PageRenderTime 58ms CodeModel.GetById 2ms app.highlight 48ms RepoModel.GetById 2ms app.codeStats 0ms

/tools/rgenetics/rgfakePed.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 537 lines | 487 code | 12 blank | 38 comment | 30 complexity | ac0932ce8802457d2bf0d598f16e9b3e MD5 | raw file
  1# modified may 2011 to name components (map/ped) as RgeneticsData to align with default base_name
  2# otherwise downstream tools fail
  3# modified march  2011 to remove post execution hook  
  4# pedigree data faker
  5# specifically designed for scalability testing of
  6# Shaun Purcel's PLINK package
  7# derived from John Ziniti's original suggestion
  8# allele frequency spectrum and random mating added
  9# ross lazarus me fecit january 13 2007
 10# copyright ross lazarus 2007
 11# without psyco
 12# generates about 10k snp genotypes in 2k subjects (666 trios) per minute or so.
 13# so 500k (a billion genotypes), at about 4 trios/min will a couple of hours to generate
 14# psyco makes it literally twice as quick!!
 15# all rights reserved except as granted under the terms of the LGPL
 16# see http://www.gnu.org/licenses/lgpl.html 
 17# for a copy of the license you receive with this software
 18# and for your rights and obligations
 19# especially if you wish to modify or redistribute this code
 20# january 19 added random missingness inducer
 21# currently about 15M genos/minute without psyco, 30M/minute with
 22# so a billion genos should take about 40 minutes with psyco or 80 without...
 23# added mendel error generator jan 23 rml
 24
 25
 26import random,sys,time,os,string
 27
 28from optparse import OptionParser
 29
 30defbasename="RgeneticsData"    
 31width = 500000
 32ALLELES = ['1','2','3','4']
 33prog = os.path.split(sys.argv[0])[-1]
 34debug = 0
 35
 36"""Natural-order sorting, supporting embedded numbers.
 37# found at http://lists.canonical.org/pipermail/kragen-hacks/2005-October/000419.html
 38note test code there removed to conserve brain space
 39foo9bar2 < foo10bar2 < foo10bar10
 40
 41"""
 42import random, re, sys
 43
 44def natsort_key(item): 
 45    chunks = re.split('(\d+(?:\.\d+)?)', item)
 46    for ii in range(len(chunks)):
 47        if chunks[ii] and chunks[ii][0] in '0123456789':
 48            if '.' in chunks[ii]: numtype = float
 49            else: numtype = int
 50            # wrap in tuple with '0' to explicitly specify numbers come first
 51            chunks[ii] = (0, numtype(chunks[ii]))
 52        else:
 53            chunks[ii] = (1, chunks[ii])
 54    return (chunks, item)
 55
 56def natsort(seq):
 57    "Sort a sequence of text strings in a reasonable order."
 58    alist = [item for item in seq]
 59    alist.sort(key=natsort_key)
 60    return alist
 61
 62
 63def makeUniformMAFdist(low=0.02, high=0.5):
 64    """Fake a non-uniform maf distribution to make the data
 65    more interesting. Provide uniform 0.02-0.5 distribution"""
 66    MAFdistribution = []
 67    for i in xrange(int(100*low),int(100*high)+1):
 68       freq = i/100.0 # uniform
 69       MAFdistribution.append(freq)
 70    return MAFdistribution
 71
 72def makeTriangularMAFdist(low=0.02, high=0.5, beta=5):
 73    """Fake a non-uniform maf distribution to make the data
 74    more interesting - more rare alleles """
 75    MAFdistribution = []
 76    for i in xrange(int(100*low),int(100*high)+1):
 77       freq = (51 - i)/100.0 # large numbers of small allele freqs
 78       for j in range(beta*i): # or i*i for crude exponential distribution 
 79            MAFdistribution.append(freq)
 80    return MAFdistribution
 81
 82def makeFbathead(rslist=[], chromlist=[], poslist=[], width=100000):
 83    """header row
 84    """
 85    res = ['%s_%s_%s' % (chromlist[x], poslist[x], rslist[x]) for x in range(len(rslist))]
 86    return ' '.join(res)
 87
 88def makeMap( width=500000, MAFdistribution=[], useGP=False):
 89    """make snp allele and frequency tables for consistent generation"""
 90    usegp = 1
 91    snpdb = 'snp126'
 92    hgdb = 'hg18'
 93    alleles = []
 94    freqs = []
 95    rslist = []
 96    chromlist = []
 97    poslist = []
 98    for snp in range(width):
 99        random.shuffle(ALLELES)
100        alleles.append(ALLELES[0:2]) # need two DIFFERENT alleles!
101        freqs.append(random.choice(MAFdistribution)) # more rare alleles
102    if useGP:
103        try:
104            import MySQLdb
105            genome = MySQLdb.Connect('localhost', 'hg18', 'G3gn0m3')
106            curs = genome.cursor() # use default cursor
107        except:
108            if debug:
109                print 'cannot connect to local copy of golden path'
110            usegp = 0
111    if usegp and useGP: # urrrgghh getting snps into chrom offset order is complicated....
112        curs.execute('use %s' % hgdb)
113        print 'Collecting %d real rs numbers - this may take a while' % width
114        # get a random draw of enough reasonable (hapmap) snps with frequency data
115        s = '''select distinct chrom,chromEnd, name from %s where avHet > 0 and chrom not like '%%random'
116        group by name order by rand() limit %d''' % (snpdb,width)
117        curs.execute(s)
118        reslist = curs.fetchall()
119        reslist = ['%s\t%09d\t%s' % (x[3:],y,z) for x,y,z in reslist] # get rid of chr
120        reslist = natsort(reslist)
121        for s in reslist:
122            chrom,pos,rs = s.split('\t')
123            rslist.append(rs)
124            chromlist.append(chrom)
125            poslist.append(pos)
126    else:
127        chrom = '1'
128        for snp in range(width):
129            pos = '%d' % (1000*snp)
130            rs = 'rs00%d' % snp
131            rslist.append(rs)
132            chromlist.append(chrom)
133            poslist.append(pos)
134    return alleles,freqs, rslist, chromlist, poslist
135
136def writeMap(fprefix = '', fpath='./', rslist=[], chromlist=[], poslist=[], width = 500000):
137    """make a faked plink compatible map file - fbat files
138    have the map written as a header line"""
139    outf = '%s.map'% (fprefix)
140    outf = os.path.join(fpath,outf)
141    amap = open(outf, 'w')
142    res = ['%s\t%s\t0\t%s' % (chromlist[x],rslist[x],poslist[x]) for x in range(len(rslist))]
143    res.append('')
144    amap.write('\n'.join(res))
145    amap.close()
146
147def makeMissing(genos=[], missrate = 0.03, missval = '0'):
148    """impose some random missingness"""
149    nsnps = len(genos)
150    for snp in range(nsnps): # ignore first 6 columns
151        if random.random() <= missrate:
152            genos[snp] = '%s %s' % (missval,missval)
153    return genos
154
155def makeTriomissing(genos=[], missrate = 0.03, missval = '0'):
156    """impose some random missingness on a trio - moth eaten like real data"""
157    for person in (0,1):
158        nsnps = len(genos[person])
159        for snp in range(nsnps):
160            for person in [0,1,2]:
161                if random.random() <= missrate:
162                    genos[person][snp] = '%s %s' % (missval,missval)
163    return genos
164
165
166def makeTriomendel(p1g=(0,0),p2g=(0,0), kiddip = (0,0)):
167    """impose some random mendels on a trio
168    there are 8 of the 9 mating types we can simulate reasonable errors for
169    Note, since random mating dual het parents can produce any genotype we can't generate an interesting
170    error for them, so the overall mendel rate will be lower than mendrate, depending on
171    allele frequency..."""
172    if p1g[0] <> p1g[1] and p2g[0] <> p2g[1]: # both parents het
173            return kiddip # cannot simulate a mendel error - anything is legal!
174    elif (p1g[0] <> p1g[1]): # p1 is het parent so p2 must be hom
175        if p2g[0] == 0: # - make child p2 opposite hom for error
176            kiddip = (1,1)
177        else:
178            kiddip = (0,0)
179    elif (p2g[0] <> p2g[1]): # p2 is het parent so p1 must be hom
180        if p1g[0] == 0: # - make child p1 opposite hom for error
181            kiddip = (1,1)
182        else:
183            kiddip = (0,0)
184    elif (p1g[0] == p1g[1]): # p1 is hom parent and if we get here p2 must also be hom
185        if p1g[0] == p2g[0]: # both parents are same hom - make child either het or opposite hom for error
186            if random.random() <= 0.5:
187                kiddip = (0,1)
188            else:
189                if p1g[0] == 0:
190                    kiddip = (1,1)
191                else:
192                    kiddip = (0,0)
193        else: # parents are opposite hom - return any hom as an error
194            if random.random() <= 0.5:
195                kiddip = (0,0)
196            else:
197                kiddip = (1,1)
198    return kiddip
199            
200            
201
202
203def makeFam(width=100, freqs={}, alleles={}, trio=1, missrate=0.03, missval='0', mendrate=0.0):
204    """this family is a simple trio, constructed by random mating two random genotypes
205    TODO: why not generate from chromosomes - eg hapmap
206    set each haplotype locus according to the conditional
207    probability implied by the surrounding loci - eg use both neighboring pairs, triplets
208    and quads as observed in hapmap ceu"""
209    dadped = '%d 1 0 0 1 1 %s'
210    mumped = '%d 2 0 0 2 1 %s' # a mother is a mum where I come from :)
211    kidped = '%d 3 1 2 %d %d %s'
212    family = [] # result accumulator
213    sex = random.choice((1,2)) # for the kid
214    affected = random.choice((1,2))
215    genos = [[],[],[]] # dad, mum, kid - 0/1 for common,rare initially, then xform to alleles
216    # parent1...kidn lists of 0/1 for common,rare initially, then xformed to alleles
217    for snp in xrange(width):
218        f = freqs[snp]           
219        for i in range(2): # do dad and mum
220            p = random.random()
221            a1 = a2 = 0
222            if p <= f: # a rare allele
223               a1 = 1
224            p = random.random()
225            if p <= f: # a rare allele
226               a2 = 1
227            if a1 > a2:
228                a1,a2 = a2,a1 # so ordering consistent - 00,01,11
229            dip = (a1,a2)
230            genos[i].append(dip) # tuples of 0,1
231        a1 = random.choice(genos[0][snp]) # dad gamete  
232        a2 = random.choice(genos[1][snp]) # mum gamete
233        if a1 > a2:
234            a1,a2 = a2,a1 # so ordering consistent - 00,01,11
235        kiddip = (a1,a2) # NSFW mating!
236        genos[2].append(kiddip)
237        if mendrate > 0:
238            if random.random() <= mendrate:
239                genos[2][snp] = makeTriomendel(genos[0][snp],genos[1][snp], kiddip)
240        achoice = alleles[snp]
241        for g in genos: # now convert to alleles using allele dict
242          a1 = achoice[g[snp][0]] # get allele letter
243          a2 = achoice[g[snp][1]]              
244          g[snp] = '%s %s' % (a1,a2)
245    if missrate > 0:
246        genos = makeTriomissing(genos=genos,missrate=missrate, missval=missval)
247    family.append(dadped % (trio,' '.join(genos[0]))) # create a row for each member of trio
248    family.append(mumped % (trio,' '.join(genos[1])))
249    family.append(kidped % (trio,sex,affected,' '.join(genos[2])))
250    return family
251
252def makePerson(width=100, aff=1, freqs={}, alleles={}, id=1, missrate = 0.03, missval='0'):
253    """make an entire genotype vector for an independent subject"""
254    sex = random.choice((1,2))
255    if not aff:
256        aff = random.choice((1,2))
257    genos = [] #0/1 for common,rare initially, then xform to alleles
258    family = []
259    personped = '%d 1 0 0 %d %d %s'
260    poly = (0,1)
261    for snp in xrange(width):
262        achoice = alleles[snp]
263        f = freqs[snp]
264        p = random.random()
265        a1 = a2 = 0
266        if p <= f: # a rare allele
267           a1 = 1
268        p = random.random()
269        if p <= f: # a rare allele
270           a2 = 1
271        if a1 > a2:
272            a1,a2 = a2,a1 # so ordering consistent - 00,01,11
273        a1 = achoice[a1] # get allele letter
274        a2 = achoice[a2]
275        g = '%s %s' % (a1,a2)
276        genos.append(g)
277    if missrate > 0.0:
278        genos = makeMissing(genos=genos,missrate=missrate, missval=missval)
279    family.append(personped % (id,sex,aff,' '.join(genos)))
280    return family
281
282def makeHapmap(fprefix= 'fakebigped',width=100, aff=[], freqs={},
283               alleles={}, nsubj = 2000, trios = True, mendrate=0.03, missrate = 0.03, missval='0'):
284    """ fake a hapmap file and a pedigree file for eg haploview
285    this is arranged as the transpose of a ped file - cols are subjects, rows are markers
286    so we need to generate differently since we can't do the transpose in ram reliably for
287    a few billion genotypes...
288    """
289    outheadprefix = 'rs# alleles chrom pos strand assembly# center protLSID assayLSID panelLSID QCcode %s'
290    cfake5 = ["illumina","urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1", 
291"urn:LSID:illumina.hapmap.org:Assay:27741:1","urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1","QC+"]
292    yfake5 = ["illumina","urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1", 
293"urn:LSID:illumina.hapmap.org:Assay:27741:1","urn:LSID:dcc.hapmap.org:Panel:Yoruba-30-trios:1","QC+"]
294    sampids = ids
295    if trios:
296        ts = '%d trios' % int(nsubj/3.)
297    else:
298        ts = '%d unrelated subjects' % nsubj
299    res = ['#%s fake hapmap file %d snps and %s, faked by %s' % (timenow(), width, ts, prog),]
300    res.append('# ross lazarus me fecit')
301    res.append(outheadprefix % ' '.join(sampids)) # make a header compatible with hapmap extracts
302    outf = open('%s.hmap' % (fprefix), 'w')
303    started = time.time()
304    if trios:
305        ntrios = int(nsubj/3.)
306        for n in ntrios: # each is a dict
307            row = copy.copy(cfake5) # get first fields
308            row = map(str,row)
309            if race == "YRI":
310                row += yfake5
311            elif race == 'CEU':
312                row += cfake5
313            else:
314                row += ['NA' for x in range(5)] # 5 dummy fields = center protLSID assayLSID panelLSID QCcode
315            row += [''.join(sorted(line[x])) for x in sampids] # the genotypes in header (sorted) sample id order
316            res.append(' '.join(row))
317    res.append('')
318    outfname = '%s_%s_%s_%dkb.geno' % (gene,probeid,race,2*flank/1000)
319    f = file(outfname,'w')
320    f.write('\n'.join(res))
321    f.close()
322    print '### %s: Wrote %d lines to %s' % (timenow(), len(res),outfname)
323     
324
325def makePed(fprefix= 'fakebigped', fpath='./',
326            width=500000, nsubj=2000, MAFdistribution=[],alleles={},
327            freqs={}, fbatstyle=True, mendrate = 0.0, missrate = 0.03, missval='0',fbathead=''):
328    """fake trios with mendel consistent random mating genotypes in offspring
329    with consistent alleles and MAFs for the sample"""
330    res = []
331    if fbatstyle: # add a header row with the marker names
332        res.append(fbathead) # header row for fbat
333    outfname = '%s.ped'% (fprefix)
334    outfname = os.path.join(fpath,outfname)
335    outf = open(outfname,'w')
336    ntrios = int(nsubj/3.)
337    outf = open(outfile, 'w')
338    started = time.time()
339    for trio in xrange(ntrios):
340        family = makeFam(width=width, freqs=freqs, alleles=alleles, trio=trio,
341                         missrate = missrate, mendrate=mendrate, missval=missval)
342        res += family
343        if (trio + 1) % 10 == 0: # write out to keep ram requirements reasonable
344            if (trio + 1) % 50 == 0: # show progress
345                dur = time.time() - started
346                if dur == 0:
347                    dur = 1.0
348                print 'Trio: %d, %4.1f genos/sec at %6.1f sec' % (trio + 1, width*trio*3/dur, dur)
349            outf.write('\n'.join(res))
350            outf.write('\n')
351            res = []
352    if len(res) > 0: # some left
353        outf.write('\n'.join(res))
354    outf.write('\n')
355    outf.close()
356    if debug:
357        print '##makeped : %6.1f seconds total runtime' % (time.time() - started)
358
359def makeIndep(fprefix = 'fakebigped', fpath='./',
360              width=500000, Nunaff=1000, Naff=1000, MAFdistribution=[],
361              alleles={}, freqs={}, fbatstyle=True, missrate = 0.03, missval='0',fbathead=''):
362    """fake a random sample from a random mating sample
363    with consistent alleles and MAFs"""
364    res = []
365    Ntot = Nunaff + Naff
366    status = [1,]*Nunaff
367    status += [2,]*Nunaff
368    outf = '%s.ped' % (fprefix)
369    outf = os.path.join(fpath,outf)
370    outf = open(outf, 'w')
371    started = time.time()
372    #sample = personMaker(width=width, affs=status, freqs=freqs, alleles=alleles, Ntomake=Ntot)
373    if fbatstyle: # add a header row with the marker names
374        res.append(fbathead) # header row for fbat
375    for id in xrange(Ntot):
376        if id < Nunaff:
377            aff = 1
378        else:
379            aff = 2
380        family = makePerson(width=width, aff=aff, freqs=freqs, alleles=alleles, id=id+1)
381        res += family
382        if (id % 50 == 0): # write out to keep ram requirements reasonable
383            if (id % 200 == 0): # show progress
384                dur = time.time() - started
385                if dur == 0:
386                    dur = 1.0
387                print 'Id: %d, %4.1f genos/sec at %6.1f sec' % (id, width*id/dur, dur)
388            outf.write('\n'.join(res))
389            outf.write('\n')
390            res = []
391    if len(res) > 0: # some left
392        outf.write('\n'.join(res))
393    outf.write('\n')
394    outf.close()
395    print '## makeindep: %6.1f seconds total runtime' % (time.time() - started)
396
397u = """
398Generate either trios or independent subjects with a prespecified
399number of random alleles and a uniform or triangular MAF distribution for
400stress testing. No LD is simulated - alleles are random. Offspring for
401trios are generated by random mating the random parental alleles so there are
402no Mendelian errors unless the -M option is used. Mendelian errors are generated
403randomly according to the possible errors given the parental mating type although
404this is fresh code and not guaranteed to work quite right yet - comments welcomed
405
406Enquiries to ross.lazarus@gmail.com
407
408eg to generate 700 trios with 500k snps, use:
409fakebigped.py -n 2100 -s 500000
410or to generate 500 independent cases and 500 controls with 100k snps and 0.02 missingness (MCAR), use:
411fakebigped.py -c 500 -n 1000 -s 100000 -m 0.02
412
413fakebigped.py -o myfake -m 0.05 -s 100000 -n 2000
414will make fbat compatible myfake.ped with 100k markers in
415666 trios (total close to 2000 subjects), a uniform MAF distribution and about 5% MCAR missing
416
417fakebigped.py -o myfake -m 0.05 -s 100000 -n 2000 -M 0.05
418will make fbat compatible myfake.ped with 100k markers in
419666 trios (total close to 2000 subjects), a uniform MAF distribution,
420about 5% Mendelian errors and about 5% MCAR missing
421
422
423fakebigped.py  -o myfakecc -m 0.05 -s 100000 -n 2000 -c 1000 -l
424will make plink compatible myfakecc.ped and myfakecc.map (that's what the -l option does),
425with 100k markers in 1000 cases and 1000 controls (affection status 2 and 1 respectively),
426a triangular MAF distribution (more rare alleles) and about 5% MCAR missing
427
428You should see about 1/4 million genotypes/second so about an hour for a
429500k snps in 2k subjects and about a 4GB ped file - these are BIG!!
430
431"""
432
433import sys, os, glob
434
435galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
436<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
437<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
438<head>
439<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
440<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
441<title></title>
442<link rel="stylesheet" href="/static/style/base.css" type="text/css" />
443</head>
444<body>
445<div class="document">
446"""
447
448
449def doImport(outfile=None,outpath=None):
450    """ import into one of the new html composite data types for Rgenetics
451        Dan Blankenberg with mods by Ross Lazarus 
452        October 2007
453    """
454    flist = glob.glob(os.path.join(outpath,'*'))
455    outf = open(outfile,'w')
456    outf.write(galhtmlprefix % prog)
457    for i, data in enumerate( flist ):
458        outf.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1]))
459    outf.write('<br><h3>This is simulated null genotype data generated by Rgenetics!</h3>')
460    outf.write('%s called with command line:<br><pre>' % prog)
461    outf.write(' '.join(sys.argv))
462    outf.write('\n</pre>\n')
463    outf.write("</div></body></html>")
464    outf.close()
465
466
467
468if __name__ == "__main__":
469    """
470    """
471    parser = OptionParser(usage=u, version="%prog 0.01")
472    a = parser.add_option
473    a("-n","--nsubjects",type="int",dest="Ntot",
474      help="nsubj: total number of subjects",default=2000)
475    a("-t","--title",dest="title",
476      help="title: file basename for outputs",default='fakeped')
477    a("-c","--cases",type="int",dest="Naff",
478      help="number of cases: independent subjects with status set to 2 (ie cases). If not set, NTOT/3 trios will be generated", default = 0)
479    a("-s","--snps",dest="width",type="int",
480      help="snps: total number of snps per subject", default=1000)
481    a("-d","--distribution",dest="MAFdist",default="Uniform",
482      help="MAF distribution - default is Uniform, can be Triangular")
483    a("-o","--outf",dest="outf",
484      help="Output file", default = 'fakeped')
485    a("-p","--outpath",dest="outpath",
486      help="Path for output files", default = './')
487    a("-l","--pLink",dest="outstyle", default='L',
488      help="Ped files as for Plink - no header, separate Map file - default is Plink style")
489    a("-w","--loWmaf", type="float", dest="lowmaf", default=0.01, help="Lower limit for SNP MAF (minor allele freq)")
490    a("-m","--missing",dest="missrate",type="float",
491      help="missing: probability of missing MCAR - default 0.0", default=0.0)
492    a("-v","--valmiss",dest="missval",
493      help="missing character: Missing allele code - usually 0 or N - default 0", default="0")
494    a("-M","--Mendelrate",dest="mendrate",type="float",
495      help="Mendelian error rate: probability of a mendel error per trio, default=0.0", default=0.0)   
496    a("-H","--noHGRS",dest="useHG",type="int",
497      help="Use local copy of UCSC snp126 database to generate real rs numbers", default=True)
498    (options,args) = parser.parse_args()
499    low = options.lowmaf
500    try:
501        os.makedirs(options.outpath)
502    except:
503        pass
504    if options.MAFdist.upper() == 'U':
505        mafDist = makeUniformMAFdist(low=low, high=0.5)
506    else:
507        mafDist = makeTriangularMAFdist(low=low, high=0.5, beta=5)
508    alleles,freqs, rslist, chromlist, poslist = makeMap(width=int(options.width),
509                                        MAFdistribution=mafDist, useGP=False)
510    fbathead = []
511    s = string.whitespace+string.punctuation
512    trantab = string.maketrans(s,'_'*len(s))
513    title = string.translate(options.title,trantab)
514    
515    if options.outstyle == 'F':
516        fbatstyle = True
517        fbathead = makeFbathead(rslist=rslist, chromlist=chromlist, poslist=poslist, width=options.width)
518    else:
519        fbatstyle = False
520        writeMap(fprefix=defbasename, rslist=rslist, fpath=options.outpath,
521                 chromlist=chromlist, poslist=poslist, width=options.width)
522    if options.Naff > 0: # make case control data
523        makeIndep(fprefix = defbasename, fpath=options.outpath,
524                  width=options.width, Nunaff=options.Ntot-options.Naff,
525                  Naff=options.Naff, MAFdistribution=mafDist,alleles=alleles, freqs=freqs,
526                  fbatstyle=fbatstyle, missrate=options.missrate, missval=options.missval,
527                  fbathead=fbathead)
528    else:
529        makePed(fprefix=defbasename, fpath=options.fpath,
530            width=options.width, MAFdistribution=mafDist, nsubj=options.Ntot,
531            alleles=alleles, freqs=freqs, fbatstyle=fbatstyle, missrate=options.missrate,
532            mendrate=options.mendrate, missval=options.missval,
533                  fbathead=fbathead)
534    doImport(outfile=options.outf,outpath=options.outpath)
535
536
537