/tools/rgenetics/rgfakePed.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 537 lines · 450 code · 32 blank · 55 comment · 66 complexity · ac0932ce8802457d2bf0d598f16e9b3e MD5 · raw file

  1. # modified may 2011 to name components (map/ped) as RgeneticsData to align with default base_name
  2. # otherwise downstream tools fail
  3. # modified march 2011 to remove post execution hook
  4. # pedigree data faker
  5. # specifically designed for scalability testing of
  6. # Shaun Purcel's PLINK package
  7. # derived from John Ziniti's original suggestion
  8. # allele frequency spectrum and random mating added
  9. # ross lazarus me fecit january 13 2007
  10. # copyright ross lazarus 2007
  11. # without psyco
  12. # generates about 10k snp genotypes in 2k subjects (666 trios) per minute or so.
  13. # so 500k (a billion genotypes), at about 4 trios/min will a couple of hours to generate
  14. # psyco makes it literally twice as quick!!
  15. # all rights reserved except as granted under the terms of the LGPL
  16. # see http://www.gnu.org/licenses/lgpl.html
  17. # for a copy of the license you receive with this software
  18. # and for your rights and obligations
  19. # especially if you wish to modify or redistribute this code
  20. # january 19 added random missingness inducer
  21. # currently about 15M genos/minute without psyco, 30M/minute with
  22. # so a billion genos should take about 40 minutes with psyco or 80 without...
  23. # added mendel error generator jan 23 rml
  24. import random,sys,time,os,string
  25. from optparse import OptionParser
  26. defbasename="RgeneticsData"
  27. width = 500000
  28. ALLELES = ['1','2','3','4']
  29. prog = os.path.split(sys.argv[0])[-1]
  30. debug = 0
  31. """Natural-order sorting, supporting embedded numbers.
  32. # found at http://lists.canonical.org/pipermail/kragen-hacks/2005-October/000419.html
  33. note test code there removed to conserve brain space
  34. foo9bar2 < foo10bar2 < foo10bar10
  35. """
  36. import random, re, sys
  37. def natsort_key(item):
  38. chunks = re.split('(\d+(?:\.\d+)?)', item)
  39. for ii in range(len(chunks)):
  40. if chunks[ii] and chunks[ii][0] in '0123456789':
  41. if '.' in chunks[ii]: numtype = float
  42. else: numtype = int
  43. # wrap in tuple with '0' to explicitly specify numbers come first
  44. chunks[ii] = (0, numtype(chunks[ii]))
  45. else:
  46. chunks[ii] = (1, chunks[ii])
  47. return (chunks, item)
  48. def natsort(seq):
  49. "Sort a sequence of text strings in a reasonable order."
  50. alist = [item for item in seq]
  51. alist.sort(key=natsort_key)
  52. return alist
  53. def makeUniformMAFdist(low=0.02, high=0.5):
  54. """Fake a non-uniform maf distribution to make the data
  55. more interesting. Provide uniform 0.02-0.5 distribution"""
  56. MAFdistribution = []
  57. for i in xrange(int(100*low),int(100*high)+1):
  58. freq = i/100.0 # uniform
  59. MAFdistribution.append(freq)
  60. return MAFdistribution
  61. def makeTriangularMAFdist(low=0.02, high=0.5, beta=5):
  62. """Fake a non-uniform maf distribution to make the data
  63. more interesting - more rare alleles """
  64. MAFdistribution = []
  65. for i in xrange(int(100*low),int(100*high)+1):
  66. freq = (51 - i)/100.0 # large numbers of small allele freqs
  67. for j in range(beta*i): # or i*i for crude exponential distribution
  68. MAFdistribution.append(freq)
  69. return MAFdistribution
  70. def makeFbathead(rslist=[], chromlist=[], poslist=[], width=100000):
  71. """header row
  72. """
  73. res = ['%s_%s_%s' % (chromlist[x], poslist[x], rslist[x]) for x in range(len(rslist))]
  74. return ' '.join(res)
  75. def makeMap( width=500000, MAFdistribution=[], useGP=False):
  76. """make snp allele and frequency tables for consistent generation"""
  77. usegp = 1
  78. snpdb = 'snp126'
  79. hgdb = 'hg18'
  80. alleles = []
  81. freqs = []
  82. rslist = []
  83. chromlist = []
  84. poslist = []
  85. for snp in range(width):
  86. random.shuffle(ALLELES)
  87. alleles.append(ALLELES[0:2]) # need two DIFFERENT alleles!
  88. freqs.append(random.choice(MAFdistribution)) # more rare alleles
  89. if useGP:
  90. try:
  91. import MySQLdb
  92. genome = MySQLdb.Connect('localhost', 'hg18', 'G3gn0m3')
  93. curs = genome.cursor() # use default cursor
  94. except:
  95. if debug:
  96. print 'cannot connect to local copy of golden path'
  97. usegp = 0
  98. if usegp and useGP: # urrrgghh getting snps into chrom offset order is complicated....
  99. curs.execute('use %s' % hgdb)
  100. print 'Collecting %d real rs numbers - this may take a while' % width
  101. # get a random draw of enough reasonable (hapmap) snps with frequency data
  102. s = '''select distinct chrom,chromEnd, name from %s where avHet > 0 and chrom not like '%%random'
  103. group by name order by rand() limit %d''' % (snpdb,width)
  104. curs.execute(s)
  105. reslist = curs.fetchall()
  106. reslist = ['%s\t%09d\t%s' % (x[3:],y,z) for x,y,z in reslist] # get rid of chr
  107. reslist = natsort(reslist)
  108. for s in reslist:
  109. chrom,pos,rs = s.split('\t')
  110. rslist.append(rs)
  111. chromlist.append(chrom)
  112. poslist.append(pos)
  113. else:
  114. chrom = '1'
  115. for snp in range(width):
  116. pos = '%d' % (1000*snp)
  117. rs = 'rs00%d' % snp
  118. rslist.append(rs)
  119. chromlist.append(chrom)
  120. poslist.append(pos)
  121. return alleles,freqs, rslist, chromlist, poslist
  122. def writeMap(fprefix = '', fpath='./', rslist=[], chromlist=[], poslist=[], width = 500000):
  123. """make a faked plink compatible map file - fbat files
  124. have the map written as a header line"""
  125. outf = '%s.map'% (fprefix)
  126. outf = os.path.join(fpath,outf)
  127. amap = open(outf, 'w')
  128. res = ['%s\t%s\t0\t%s' % (chromlist[x],rslist[x],poslist[x]) for x in range(len(rslist))]
  129. res.append('')
  130. amap.write('\n'.join(res))
  131. amap.close()
  132. def makeMissing(genos=[], missrate = 0.03, missval = '0'):
  133. """impose some random missingness"""
  134. nsnps = len(genos)
  135. for snp in range(nsnps): # ignore first 6 columns
  136. if random.random() <= missrate:
  137. genos[snp] = '%s %s' % (missval,missval)
  138. return genos
  139. def makeTriomissing(genos=[], missrate = 0.03, missval = '0'):
  140. """impose some random missingness on a trio - moth eaten like real data"""
  141. for person in (0,1):
  142. nsnps = len(genos[person])
  143. for snp in range(nsnps):
  144. for person in [0,1,2]:
  145. if random.random() <= missrate:
  146. genos[person][snp] = '%s %s' % (missval,missval)
  147. return genos
  148. def makeTriomendel(p1g=(0,0),p2g=(0,0), kiddip = (0,0)):
  149. """impose some random mendels on a trio
  150. there are 8 of the 9 mating types we can simulate reasonable errors for
  151. Note, since random mating dual het parents can produce any genotype we can't generate an interesting
  152. error for them, so the overall mendel rate will be lower than mendrate, depending on
  153. allele frequency..."""
  154. if p1g[0] <> p1g[1] and p2g[0] <> p2g[1]: # both parents het
  155. return kiddip # cannot simulate a mendel error - anything is legal!
  156. elif (p1g[0] <> p1g[1]): # p1 is het parent so p2 must be hom
  157. if p2g[0] == 0: # - make child p2 opposite hom for error
  158. kiddip = (1,1)
  159. else:
  160. kiddip = (0,0)
  161. elif (p2g[0] <> p2g[1]): # p2 is het parent so p1 must be hom
  162. if p1g[0] == 0: # - make child p1 opposite hom for error
  163. kiddip = (1,1)
  164. else:
  165. kiddip = (0,0)
  166. elif (p1g[0] == p1g[1]): # p1 is hom parent and if we get here p2 must also be hom
  167. if p1g[0] == p2g[0]: # both parents are same hom - make child either het or opposite hom for error
  168. if random.random() <= 0.5:
  169. kiddip = (0,1)
  170. else:
  171. if p1g[0] == 0:
  172. kiddip = (1,1)
  173. else:
  174. kiddip = (0,0)
  175. else: # parents are opposite hom - return any hom as an error
  176. if random.random() <= 0.5:
  177. kiddip = (0,0)
  178. else:
  179. kiddip = (1,1)
  180. return kiddip
  181. def makeFam(width=100, freqs={}, alleles={}, trio=1, missrate=0.03, missval='0', mendrate=0.0):
  182. """this family is a simple trio, constructed by random mating two random genotypes
  183. TODO: why not generate from chromosomes - eg hapmap
  184. set each haplotype locus according to the conditional
  185. probability implied by the surrounding loci - eg use both neighboring pairs, triplets
  186. and quads as observed in hapmap ceu"""
  187. dadped = '%d 1 0 0 1 1 %s'
  188. mumped = '%d 2 0 0 2 1 %s' # a mother is a mum where I come from :)
  189. kidped = '%d 3 1 2 %d %d %s'
  190. family = [] # result accumulator
  191. sex = random.choice((1,2)) # for the kid
  192. affected = random.choice((1,2))
  193. genos = [[],[],[]] # dad, mum, kid - 0/1 for common,rare initially, then xform to alleles
  194. # parent1...kidn lists of 0/1 for common,rare initially, then xformed to alleles
  195. for snp in xrange(width):
  196. f = freqs[snp]
  197. for i in range(2): # do dad and mum
  198. p = random.random()
  199. a1 = a2 = 0
  200. if p <= f: # a rare allele
  201. a1 = 1
  202. p = random.random()
  203. if p <= f: # a rare allele
  204. a2 = 1
  205. if a1 > a2:
  206. a1,a2 = a2,a1 # so ordering consistent - 00,01,11
  207. dip = (a1,a2)
  208. genos[i].append(dip) # tuples of 0,1
  209. a1 = random.choice(genos[0][snp]) # dad gamete
  210. a2 = random.choice(genos[1][snp]) # mum gamete
  211. if a1 > a2:
  212. a1,a2 = a2,a1 # so ordering consistent - 00,01,11
  213. kiddip = (a1,a2) # NSFW mating!
  214. genos[2].append(kiddip)
  215. if mendrate > 0:
  216. if random.random() <= mendrate:
  217. genos[2][snp] = makeTriomendel(genos[0][snp],genos[1][snp], kiddip)
  218. achoice = alleles[snp]
  219. for g in genos: # now convert to alleles using allele dict
  220. a1 = achoice[g[snp][0]] # get allele letter
  221. a2 = achoice[g[snp][1]]
  222. g[snp] = '%s %s' % (a1,a2)
  223. if missrate > 0:
  224. genos = makeTriomissing(genos=genos,missrate=missrate, missval=missval)
  225. family.append(dadped % (trio,' '.join(genos[0]))) # create a row for each member of trio
  226. family.append(mumped % (trio,' '.join(genos[1])))
  227. family.append(kidped % (trio,sex,affected,' '.join(genos[2])))
  228. return family
  229. def makePerson(width=100, aff=1, freqs={}, alleles={}, id=1, missrate = 0.03, missval='0'):
  230. """make an entire genotype vector for an independent subject"""
  231. sex = random.choice((1,2))
  232. if not aff:
  233. aff = random.choice((1,2))
  234. genos = [] #0/1 for common,rare initially, then xform to alleles
  235. family = []
  236. personped = '%d 1 0 0 %d %d %s'
  237. poly = (0,1)
  238. for snp in xrange(width):
  239. achoice = alleles[snp]
  240. f = freqs[snp]
  241. p = random.random()
  242. a1 = a2 = 0
  243. if p <= f: # a rare allele
  244. a1 = 1
  245. p = random.random()
  246. if p <= f: # a rare allele
  247. a2 = 1
  248. if a1 > a2:
  249. a1,a2 = a2,a1 # so ordering consistent - 00,01,11
  250. a1 = achoice[a1] # get allele letter
  251. a2 = achoice[a2]
  252. g = '%s %s' % (a1,a2)
  253. genos.append(g)
  254. if missrate > 0.0:
  255. genos = makeMissing(genos=genos,missrate=missrate, missval=missval)
  256. family.append(personped % (id,sex,aff,' '.join(genos)))
  257. return family
  258. def makeHapmap(fprefix= 'fakebigped',width=100, aff=[], freqs={},
  259. alleles={}, nsubj = 2000, trios = True, mendrate=0.03, missrate = 0.03, missval='0'):
  260. """ fake a hapmap file and a pedigree file for eg haploview
  261. this is arranged as the transpose of a ped file - cols are subjects, rows are markers
  262. so we need to generate differently since we can't do the transpose in ram reliably for
  263. a few billion genotypes...
  264. """
  265. outheadprefix = 'rs# alleles chrom pos strand assembly# center protLSID assayLSID panelLSID QCcode %s'
  266. cfake5 = ["illumina","urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1",
  267. "urn:LSID:illumina.hapmap.org:Assay:27741:1","urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1","QC+"]
  268. yfake5 = ["illumina","urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1",
  269. "urn:LSID:illumina.hapmap.org:Assay:27741:1","urn:LSID:dcc.hapmap.org:Panel:Yoruba-30-trios:1","QC+"]
  270. sampids = ids
  271. if trios:
  272. ts = '%d trios' % int(nsubj/3.)
  273. else:
  274. ts = '%d unrelated subjects' % nsubj
  275. res = ['#%s fake hapmap file %d snps and %s, faked by %s' % (timenow(), width, ts, prog),]
  276. res.append('# ross lazarus me fecit')
  277. res.append(outheadprefix % ' '.join(sampids)) # make a header compatible with hapmap extracts
  278. outf = open('%s.hmap' % (fprefix), 'w')
  279. started = time.time()
  280. if trios:
  281. ntrios = int(nsubj/3.)
  282. for n in ntrios: # each is a dict
  283. row = copy.copy(cfake5) # get first fields
  284. row = map(str,row)
  285. if race == "YRI":
  286. row += yfake5
  287. elif race == 'CEU':
  288. row += cfake5
  289. else:
  290. row += ['NA' for x in range(5)] # 5 dummy fields = center protLSID assayLSID panelLSID QCcode
  291. row += [''.join(sorted(line[x])) for x in sampids] # the genotypes in header (sorted) sample id order
  292. res.append(' '.join(row))
  293. res.append('')
  294. outfname = '%s_%s_%s_%dkb.geno' % (gene,probeid,race,2*flank/1000)
  295. f = file(outfname,'w')
  296. f.write('\n'.join(res))
  297. f.close()
  298. print '### %s: Wrote %d lines to %s' % (timenow(), len(res),outfname)
  299. def makePed(fprefix= 'fakebigped', fpath='./',
  300. width=500000, nsubj=2000, MAFdistribution=[],alleles={},
  301. freqs={}, fbatstyle=True, mendrate = 0.0, missrate = 0.03, missval='0',fbathead=''):
  302. """fake trios with mendel consistent random mating genotypes in offspring
  303. with consistent alleles and MAFs for the sample"""
  304. res = []
  305. if fbatstyle: # add a header row with the marker names
  306. res.append(fbathead) # header row for fbat
  307. outfname = '%s.ped'% (fprefix)
  308. outfname = os.path.join(fpath,outfname)
  309. outf = open(outfname,'w')
  310. ntrios = int(nsubj/3.)
  311. outf = open(outfile, 'w')
  312. started = time.time()
  313. for trio in xrange(ntrios):
  314. family = makeFam(width=width, freqs=freqs, alleles=alleles, trio=trio,
  315. missrate = missrate, mendrate=mendrate, missval=missval)
  316. res += family
  317. if (trio + 1) % 10 == 0: # write out to keep ram requirements reasonable
  318. if (trio + 1) % 50 == 0: # show progress
  319. dur = time.time() - started
  320. if dur == 0:
  321. dur = 1.0
  322. print 'Trio: %d, %4.1f genos/sec at %6.1f sec' % (trio + 1, width*trio*3/dur, dur)
  323. outf.write('\n'.join(res))
  324. outf.write('\n')
  325. res = []
  326. if len(res) > 0: # some left
  327. outf.write('\n'.join(res))
  328. outf.write('\n')
  329. outf.close()
  330. if debug:
  331. print '##makeped : %6.1f seconds total runtime' % (time.time() - started)
  332. def makeIndep(fprefix = 'fakebigped', fpath='./',
  333. width=500000, Nunaff=1000, Naff=1000, MAFdistribution=[],
  334. alleles={}, freqs={}, fbatstyle=True, missrate = 0.03, missval='0',fbathead=''):
  335. """fake a random sample from a random mating sample
  336. with consistent alleles and MAFs"""
  337. res = []
  338. Ntot = Nunaff + Naff
  339. status = [1,]*Nunaff
  340. status += [2,]*Nunaff
  341. outf = '%s.ped' % (fprefix)
  342. outf = os.path.join(fpath,outf)
  343. outf = open(outf, 'w')
  344. started = time.time()
  345. #sample = personMaker(width=width, affs=status, freqs=freqs, alleles=alleles, Ntomake=Ntot)
  346. if fbatstyle: # add a header row with the marker names
  347. res.append(fbathead) # header row for fbat
  348. for id in xrange(Ntot):
  349. if id < Nunaff:
  350. aff = 1
  351. else:
  352. aff = 2
  353. family = makePerson(width=width, aff=aff, freqs=freqs, alleles=alleles, id=id+1)
  354. res += family
  355. if (id % 50 == 0): # write out to keep ram requirements reasonable
  356. if (id % 200 == 0): # show progress
  357. dur = time.time() - started
  358. if dur == 0:
  359. dur = 1.0
  360. print 'Id: %d, %4.1f genos/sec at %6.1f sec' % (id, width*id/dur, dur)
  361. outf.write('\n'.join(res))
  362. outf.write('\n')
  363. res = []
  364. if len(res) > 0: # some left
  365. outf.write('\n'.join(res))
  366. outf.write('\n')
  367. outf.close()
  368. print '## makeindep: %6.1f seconds total runtime' % (time.time() - started)
  369. u = """
  370. Generate either trios or independent subjects with a prespecified
  371. number of random alleles and a uniform or triangular MAF distribution for
  372. stress testing. No LD is simulated - alleles are random. Offspring for
  373. trios are generated by random mating the random parental alleles so there are
  374. no Mendelian errors unless the -M option is used. Mendelian errors are generated
  375. randomly according to the possible errors given the parental mating type although
  376. this is fresh code and not guaranteed to work quite right yet - comments welcomed
  377. Enquiries to ross.lazarus@gmail.com
  378. eg to generate 700 trios with 500k snps, use:
  379. fakebigped.py -n 2100 -s 500000
  380. or to generate 500 independent cases and 500 controls with 100k snps and 0.02 missingness (MCAR), use:
  381. fakebigped.py -c 500 -n 1000 -s 100000 -m 0.02
  382. fakebigped.py -o myfake -m 0.05 -s 100000 -n 2000
  383. will make fbat compatible myfake.ped with 100k markers in
  384. 666 trios (total close to 2000 subjects), a uniform MAF distribution and about 5% MCAR missing
  385. fakebigped.py -o myfake -m 0.05 -s 100000 -n 2000 -M 0.05
  386. will make fbat compatible myfake.ped with 100k markers in
  387. 666 trios (total close to 2000 subjects), a uniform MAF distribution,
  388. about 5% Mendelian errors and about 5% MCAR missing
  389. fakebigped.py -o myfakecc -m 0.05 -s 100000 -n 2000 -c 1000 -l
  390. will make plink compatible myfakecc.ped and myfakecc.map (that's what the -l option does),
  391. with 100k markers in 1000 cases and 1000 controls (affection status 2 and 1 respectively),
  392. a triangular MAF distribution (more rare alleles) and about 5% MCAR missing
  393. You should see about 1/4 million genotypes/second so about an hour for a
  394. 500k snps in 2k subjects and about a 4GB ped file - these are BIG!!
  395. """
  396. import sys, os, glob
  397. galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
  398. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  399. <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  400. <head>
  401. <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  402. <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
  403. <title></title>
  404. <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
  405. </head>
  406. <body>
  407. <div class="document">
  408. """
  409. def doImport(outfile=None,outpath=None):
  410. """ import into one of the new html composite data types for Rgenetics
  411. Dan Blankenberg with mods by Ross Lazarus
  412. October 2007
  413. """
  414. flist = glob.glob(os.path.join(outpath,'*'))
  415. outf = open(outfile,'w')
  416. outf.write(galhtmlprefix % prog)
  417. for i, data in enumerate( flist ):
  418. outf.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1]))
  419. outf.write('<br><h3>This is simulated null genotype data generated by Rgenetics!</h3>')
  420. outf.write('%s called with command line:<br><pre>' % prog)
  421. outf.write(' '.join(sys.argv))
  422. outf.write('\n</pre>\n')
  423. outf.write("</div></body></html>")
  424. outf.close()
  425. if __name__ == "__main__":
  426. """
  427. """
  428. parser = OptionParser(usage=u, version="%prog 0.01")
  429. a = parser.add_option
  430. a("-n","--nsubjects",type="int",dest="Ntot",
  431. help="nsubj: total number of subjects",default=2000)
  432. a("-t","--title",dest="title",
  433. help="title: file basename for outputs",default='fakeped')
  434. a("-c","--cases",type="int",dest="Naff",
  435. help="number of cases: independent subjects with status set to 2 (ie cases). If not set, NTOT/3 trios will be generated", default = 0)
  436. a("-s","--snps",dest="width",type="int",
  437. help="snps: total number of snps per subject", default=1000)
  438. a("-d","--distribution",dest="MAFdist",default="Uniform",
  439. help="MAF distribution - default is Uniform, can be Triangular")
  440. a("-o","--outf",dest="outf",
  441. help="Output file", default = 'fakeped')
  442. a("-p","--outpath",dest="outpath",
  443. help="Path for output files", default = './')
  444. a("-l","--pLink",dest="outstyle", default='L',
  445. help="Ped files as for Plink - no header, separate Map file - default is Plink style")
  446. a("-w","--loWmaf", type="float", dest="lowmaf", default=0.01, help="Lower limit for SNP MAF (minor allele freq)")
  447. a("-m","--missing",dest="missrate",type="float",
  448. help="missing: probability of missing MCAR - default 0.0", default=0.0)
  449. a("-v","--valmiss",dest="missval",
  450. help="missing character: Missing allele code - usually 0 or N - default 0", default="0")
  451. a("-M","--Mendelrate",dest="mendrate",type="float",
  452. help="Mendelian error rate: probability of a mendel error per trio, default=0.0", default=0.0)
  453. a("-H","--noHGRS",dest="useHG",type="int",
  454. help="Use local copy of UCSC snp126 database to generate real rs numbers", default=True)
  455. (options,args) = parser.parse_args()
  456. low = options.lowmaf
  457. try:
  458. os.makedirs(options.outpath)
  459. except:
  460. pass
  461. if options.MAFdist.upper() == 'U':
  462. mafDist = makeUniformMAFdist(low=low, high=0.5)
  463. else:
  464. mafDist = makeTriangularMAFdist(low=low, high=0.5, beta=5)
  465. alleles,freqs, rslist, chromlist, poslist = makeMap(width=int(options.width),
  466. MAFdistribution=mafDist, useGP=False)
  467. fbathead = []
  468. s = string.whitespace+string.punctuation
  469. trantab = string.maketrans(s,'_'*len(s))
  470. title = string.translate(options.title,trantab)
  471. if options.outstyle == 'F':
  472. fbatstyle = True
  473. fbathead = makeFbathead(rslist=rslist, chromlist=chromlist, poslist=poslist, width=options.width)
  474. else:
  475. fbatstyle = False
  476. writeMap(fprefix=defbasename, rslist=rslist, fpath=options.outpath,
  477. chromlist=chromlist, poslist=poslist, width=options.width)
  478. if options.Naff > 0: # make case control data
  479. makeIndep(fprefix = defbasename, fpath=options.outpath,
  480. width=options.width, Nunaff=options.Ntot-options.Naff,
  481. Naff=options.Naff, MAFdistribution=mafDist,alleles=alleles, freqs=freqs,
  482. fbatstyle=fbatstyle, missrate=options.missrate, missval=options.missval,
  483. fbathead=fbathead)
  484. else:
  485. makePed(fprefix=defbasename, fpath=options.fpath,
  486. width=options.width, MAFdistribution=mafDist, nsubj=options.Ntot,
  487. alleles=alleles, freqs=freqs, fbatstyle=fbatstyle, missrate=options.missrate,
  488. mendrate=options.mendrate, missval=options.missval,
  489. fbathead=fbathead)
  490. doImport(outfile=options.outf,outpath=options.outpath)