/updatepreOTToLnewCoL.py

https://bitbucket.org/mtholder/ottol · Python · 434 lines · 354 code · 42 blank · 38 comment · 83 complexity · 6d739099000775544d6ba8dca60ae428 MD5 · raw file

  1. ##########################################################################################
  2. # Goal: to make a file from the CoL database that includes just the taxonomy
  3. # reforming the taxonomy list to mirror our ideas of the tree (i.e. changing 'Animalia'
  4. #to 'Eukaryots, Opisthokonta, Metazoa' and renaming all the 'Chromista' and 'Protozoa'
  5. # taxa.
  6. # Second method, extract the species not already in OTToL
  7. #
  8. #
  9. ##########################################################################################
  10. import datetime
  11. import os, re
  12. numlist = []
  13. ##########################################################################################
  14. def findlastnum(filename, dbname):
  15. infile = open(filename,'r').readlines()
  16. for line in infile:
  17. db = line.split('\t')[2]
  18. if re.search(dbname,db):
  19. numlist.append(line.split('\t')[0])
  20. numlist.sort()
  21. #print numlist
  22. return int(numlist.pop()) + 1
  23. ##########################################################################################
  24. def parsetextfromCoL():
  25. infile = open('col/taxa.txt','r') #taxa.txt is file downloaded from CoL with all taxa
  26. outfile1 = open('acceptedNames','a')
  27. outfile2 = open('provisionallyAcceptedNames','a')
  28. count = 0
  29. pcount = 0
  30. for line in infile:
  31. if not re.search('infraspecies',line):
  32. if re.search('accepted',line) and re.search('species',line):
  33. if not re.search('provisionally',line):
  34. outfile1.write(line)
  35. count = count + 1
  36. else:
  37. outfile2.write(line)
  38. pcount = pcount + 1
  39. #print 'accepted names: '+ str(count)
  40. #print 'provisionally accepted names: '+ str(pcount)
  41. ##########################################################################################
  42. def renameCoL(f):
  43. outfile = open('taxa_' + f,'w')
  44. errorout = open('viruses_and_other_prblems','a')
  45. infile = open(f,'r').readlines()
  46. newtaxlist = []
  47. for line in infile:
  48. taxalist = line.split('\t\t')[2]
  49. #print taxalist
  50. sp = taxalist.split()[1].strip(',')
  51. if taxalist.split('\t')[1] == 'Archaea':
  52. newline = taxalist.split('\t')[1:]
  53. elif taxalist.split('\t')[1] == 'Bacteria':
  54. newline = taxalist.split('\t')[1:]
  55. elif taxalist.split('\t')[1] == 'Animalia':
  56. newline = re.sub('Animalia','Eukaryota\tOpisthokonta\tMetazoa',taxalist).split('\t')[1:]
  57. elif taxalist.split('\t')[1] == 'Fungi':
  58. newline = re.sub('Fungi','Eukaryota\tOpisthokonta\tFungi',taxalist).split('\t')[1:]
  59. elif taxalist.split('\t')[1] == 'Plantae':
  60. newline = re.sub('Plantae','Eukaryota\tPlantae',taxalist).split('\t')[1:]
  61. elif taxalist.split('\t')[1] == 'Protozoa':
  62. newline = renameProt(taxalist) #.split('\t')[1:]
  63. elif taxalist.split('\t')[1] == 'Chromista':
  64. newline = renameProt(taxalist) #.split('\t')[1:]
  65. else:
  66. if taxalist.split('\t')[1] == 'Viruses' or re.search('viruses',taxalist):
  67. newline = 'Virus'
  68. else:
  69. errorout = open('viruses_and_other_prblems','a')
  70. errorout.write('problem with 1 ' + line)
  71. errorout.close()
  72. newline = ''
  73. if newline != '' or newline != 'Virus':
  74. try:
  75. check(newline)
  76. except:
  77. print line, newline
  78. print 'problem with check(newline) ' + str(printlist)
  79. errorout = open('viruses_and_other_prblems','a')
  80. errorout.write('problem with check(newline) ' + str(printlist) + '\n')
  81. errorout.close()
  82. try:
  83. printlist = newline + [sp]
  84. try:
  85. assert re.search('[A-Z]',printlist[-2][0]) and re.search('[a-z]',printlist[-1][0])
  86. for item in printlist:
  87. if re.search(',',item):
  88. print str(printlist)
  89. errorout = open('viruses_and_other_prblems','a')
  90. errorout.write('problem with str(printlist) ' + str(printlist) + '\n')
  91. errorout.close()
  92. else:
  93. outfile.write(item + '\t')
  94. outfile.write('\n')
  95. except:
  96. errorout = open('viruses_and_other_prblems','a')
  97. errorout.write('problem with genera and species ' + str(line))
  98. errorout.close()
  99. except:
  100. errorout = open('viruses_and_other_prblems','a')
  101. errorout.write('problem with printlist ' + str(line))
  102. errorout.close()
  103. ##########################################################################################
  104. def renameProt(taxlist): #hash table built from our understanding of the tree
  105. myhash = {}
  106. taxalist = taxlist.split('\t')[1:] #get rid of preceding genus species
  107. myhash['Protozoa','Acritarcha','Not assigned'] = ['Eukaryota','EE','Acritarcha','Not assigned']
  108. myhash['Protozoa','Apicomplexa','Conoidasida'] = ['Eukaryota','SAR','Alveolata','Apicomplexa','Conoidasida']
  109. myhash['Protozoa','Apicomplexa','Not assigned'] = ['Eukaryota','SAR','Alveolata','Apicomplexa','Not assigned']
  110. myhash['Protozoa','Cercozoa','Chlorarachniophyceae'] = ['Eukaryota','SAR','Rhizaria','Cercozoa','Chlorarachniophyceae']
  111. myhash['Protozoa','Cercozoa','Phytomyxea'] = ['Eukaryota','SAR','Rhizaria','Cercozoa','Phytomyxea']
  112. myhash['Protozoa','Choanozoa','Mesomycetozoea'] = ['Eukaryota','Opisthokonta','Choanozoa','Mesomycetozoea']
  113. myhash['Protozoa','Choanozoa','Not assigned'] = ['Eukaryota','Opisthokonta','Choanozoa','Not assigned']
  114. myhash['Protozoa','Ciliophora','Ciliatea'] = ['Eukaryota','SAR','Alveolata','Ciliophora','Ciliatea']
  115. myhash['Protozoa','Dinophyta','Dinophyceae'] = ['Eukaryota','SAR','Alveolata','Dinophyta','Dinophyceae']
  116. myhash['Protozoa','Dinophyta','Ebriophyceae'] = ['Eukaryota','SAR','Alveolata','Dinophyta','Not assigned']
  117. myhash['Protozoa','Dinophyta','Not assigned'] = ['Eukaryota','Excavata','Euglenozoa','Euglenida']
  118. myhash['Protozoa','Euglenozoa','Euglenida'] = ['Eukaryota','Excavata','Euglenozoa','Euglenida']
  119. myhash['Protozoa','Euglenozoa','Trypanosomatidae'] = ['Eukaryota','Excavata','Euglenozoa','Trypanosomatidae']
  120. myhash['Protozoa','Flagellata','Not assigned'] = ['Eukaryota','Excavata','Flagellata','Not assigned']
  121. myhash['Protozoa','Mycetozoa','Acrasiomycetes'] = ['Eukaryota','Amoebozoa','Mycetozoa','Acrasiomycetes']
  122. myhash['Protozoa','Mycetozoa','Dictyosteliomycetes'] = ['Eukaryota','Amoebozoa','Mycetozoa','Dictyosteliomycetes']
  123. myhash['Protozoa','Mycetozoa','Myxomycetes'] = ['Eukaryota','Amoebozoa','Mycetozoa','Myxomycetes']
  124. myhash['Protozoa','Mycetozoa','Protosteliomycetes'] = ['Eukaryota','Amoebozoa','Mycetozoa','Protosteliomycetes']
  125. myhash['Protozoa','Mycetozoa','Not assigned'] = ['Eukaryota','Amoebozoa','Mycetozoa']
  126. myhash['Protozoa','Not assigned','Acantharia'] = ['Eukaryota','SAR','Rhizaria','Acantharia']
  127. myhash['Protozoa','Not assigned','Filosia'] = ['Eukaryota','SAR','Rhizaria','Filosia']
  128. myhash['Protozoa','Not assigned','Granuloreticulosea'] = ['Eukaryota','SAR','Rhizaria','Granuloreticulosea']
  129. myhash['Protozoa','Not assigned','Haplosporea'] = ['Eukaryota','SAR','Rhizaria','Haplosporea']
  130. myhash['Protozoa','Not assigned','Heliozoa','Actinophryida'] = ['Eukaryota','SAR','Stramenopile','Actinophryidae']
  131. myhash['Protozoa','Not assigned','Heliozoa','Centrohelida'] = ['Eukaryota','EE','Centrohelida']
  132. myhash['Protozoa','Not assigned','Heliozoa','Desmothothoracida'] = ['Eukaryota','SAR','Rhizaria','Desmothoracida']
  133. myhash['Protozoa','Not assigned','Heliozoa','Desmothoracida'] = ['Eukaryota','SAR','Rhizaria','Desmothoracida']
  134. myhash['Protozoa','Not assigned','Lobosa'] = ['Eukaryota','Amoebozoa','Lobosa']
  135. myhash['Protozoa','Not assigned','Not assigned','Jakobaceae'] = ['Eukaryota','Excavata','Jakobid']
  136. myhash['Protozoa','Not assigned','Sporozoa'] = ['Eukaryota','SAR','Alveolata','Apicomplexa']
  137. myhash['Protozoa','Parabasalia','Not assigned'] = ['Eukaryota','Excavata','Parabasalia']
  138. myhash['Protozoa','Percolozoa','Heterolobosea'] = ['Eukaryota','Excavata','Heterolobosea']
  139. myhash['Protozoa','Sarcomastigophora','Phytomastigophora'] = ['Eukaryota','SAR','Alveolata','Dinoflagellate']
  140. myhash['Protozoa','Sarcomastigophora','Zoomastigophora','Diplomonadida'] = ['Eukaryota','Excavata','Fornicata']
  141. myhash['Protozoa','Sarcomastigophora','Zoomastigophora','Trichomonadida'] = ['Eukaryota','Excavata','Parabasalia']
  142. myhash['Protozoa','Xenophyophora','Psamminida'] = ['Eukaryota','SAR','Rhizaria','Xenophyophora','Psamminida']
  143. myhash['Protozoa','Xenophyophora','Stannomida'] = ['Eukaryota','SAR','Rhizaria','Xenophyophora','Stannomida']
  144. myhash['Protozoa','Sarcomastigophora','Polycystina'] = ['Eukaryota','SAR','Rhizaria','Radiolaria']
  145. myhash['Protozoa','Myzozoa','Perkinsea'] = ['Eukaryota','EE','Perkinsus']
  146. myhash['Chromista','Cryptophyta','Cryptophyceae'] = ['Eukaryota','EE','Cryptophyta','Cryptophyceae']
  147. myhash['Chromista','Haptophyta','Not assigned'] = ['Eukaryota','EE','Haptophyta','Not assigned']
  148. myhash['Chromista','Haptophyta','Prymnesiophyceae'] = ['Eukaryota','EE','Haptophyta','Prymnesiophyceae']
  149. myhash['Chromista','Hyphochytriomycota','Hyphochytriomycetes'] = ['Eukaryota','SAR','Stramenopile','Hyphochytriomycetes']
  150. myhash['Chromista','Labyrinthista','Labyrinthulea'] = ['Eukaryota','SAR','Stramenopile','Labyrinthulea']
  151. myhash['Protozoa','Labyrinthista','Labyrinthulea'] = ['Eukaryota','SAR','Stramenopile','Labyrinthulea']
  152. myhash['Chromista','Ochrophyta','Bodonophyceae'] = ['Eukaryota','SAR','Stramenopile','Bodonophyceae']
  153. myhash['Chromista','Ochrophyta','Chrysophyceae'] = ['Eukaryota','SAR','Stramenopile','Chrysophyceae']
  154. myhash['Chromista','Ochrophyta','Coscinodiscophyceae'] = ['Eukaryota','SAR','Stramenopile','Coscinodiscophyceae']
  155. myhash['Chromista','Ochrophyta','Craspedophyceae'] = ['Eukaryota','SAR','Stramenopile','Craspedophyceae']
  156. myhash['Chromista','Ochrophyta','Dictyochophyceae'] = ['Eukaryota','SAR','Stramenopile','Dictyochophyceae']
  157. myhash['Chromista','Ochrophyta','Eustigmatophyceae'] = ['Eukaryota','SAR','Stramenopile','Eustigmatophyceae']
  158. myhash['Chromista','Ochrophyta','Fragilariophyceae'] = ['Eukaryota','SAR','Stramenopile','Fragilariophyceae']
  159. myhash['Chromista','Ochrophyta','Hexamitophyceae'] = ['Eukaryota','SAR','Stramenopile','Hexamitophyceae']
  160. myhash['Chromista','Ochrophyta','Phaeophyceae'] = ['Eukaryota','SAR','Stramenopile','Phaeophyceae']
  161. myhash['Chromista','Ochrophyta','Raphidophyceae'] = ['Eukaryota','SAR','Stramenopile','Raphidophyceae']
  162. myhash['Chromista','Ochrophyta','Synurophyceae'] = ['Eukaryota','SAR','Stramenopile','Synurophyceae']
  163. myhash['Chromista','Ochrophyta','Xanthophyceae'] = ['Eukaryota','SAR','Stramenopile','Xanthophyceae']
  164. myhash['Chromista','Oomycota','Not assigned'] = ['Eukaryota','SAR','Stramenopile','Not assigned']
  165. myhash['Chromista','Oomycota','Oomycetes'] = ['Eukaryota','SAR','Stramenopile','Oomycetes']
  166. myhash['Chromista','Sagenista','Bicosoecophyceae'] = ['Eukaryota','SAR','Stramenopile','Bicosoecophyceae']
  167. myhash['Chromista','Ochrophyta','Pelagophyceae'] = ['Eukaryota','SAR','Stramenopile','Pelagophyceae']
  168. myhash['Chromista','Ochrophyta','Bolidophyceae'] = ['Eukaryota','SAR','Stramenopile','Bolidophyceae']
  169. myhash['Chromista','Ochrophyta','Pinguiophyceae'] = ['Eukaryota','SAR','Stramenopile','Pinguiophyceae']
  170. myhash['Chromista','Not assigned','Schizocladiophyceae'] = ['Eukaryota','SAR','Stramenopile','Schizocladiophyceae']
  171. myhash['Chromista','Ochrophyta','Pinguiophyceae'] = ['Eukaryota','SAR','Stramenopile','Pinguiophyceae']
  172. myhash['Chromista','Not assigned','Developayella'] = ['Eukaryota','SAR','Stramenopile','Developayella']
  173. for key in myhash.keys():
  174. newtaxlist = myhash[key]
  175. keylist = key
  176. if all(x in taxalist for x in keylist):
  177. for y in keylist:
  178. taxalist.remove(y) #remove old list
  179. for item in taxalist:
  180. newtaxlist.append(item)
  181. #print newtaxlist
  182. return newtaxlist
  183. ##########################################################################################
  184. def mark_notinOTToL(OTToL):
  185. i = 0
  186. infile = open('allCoLrenamed_taxa','r').readlines()
  187. infile2 = open(OTToL,'r').readlines()
  188. outfile = open('new_sp_2add2OTToL','a')
  189. taxonDict = {}
  190. for line in infile2:
  191. taxid = line.split('\t')[0]
  192. parid = line.split('\t')[1]
  193. taxon = line.split('\t')[3]
  194. taxonDict[taxon] = line
  195. for line in infile:
  196. try:
  197. assert re.search('[A-Z]',line.split()[-2][0]) and re.search('[a-z]',line.split()[-1][0])
  198. gensp = line.split()[-2] + ' ' + line.split()[-1]
  199. except:
  200. outfile2 = open('error_entering','a')
  201. outfile2.write(line)
  202. outfile2.close()
  203. gensp = ''
  204. try:
  205. outfile2 = open('already_in','a')
  206. outfile2.write(taxonDict[gensp])
  207. outfile2.close()
  208. except:
  209. if gensp != '':
  210. outfile.write(line)
  211. outfile.close()
  212. ##########################################################################################
  213. def check(line):
  214. #error = open('errorlog','a')
  215. #infile = open('taxa_' + f,'r')
  216. #for line in infile:
  217. if line == 'Virus':
  218. return
  219. if line[0] not in ['Eukaryota','Bacteria','Archaea']:
  220. error = open('errorlog','a')
  221. error.write(line)
  222. error.close()
  223. if line[0] == 'Eukaryota':
  224. if line[1] not in ['Opisthokonta','Plantae','SAR','Amoebozoa','Excavata','EE']:
  225. error = open('errorlog','a')
  226. error.write(line)
  227. error.close()
  228. if line[0] == 'Bacteria':
  229. if line[1] not in ['Actinobacteria','Cyanobacteria','Acidobacteria','Aquificae','Bacteroidetes','Chlamydiae','Chlorobi','Chloroflexi','Chrysiogenetes','Deferribacteres','Deinococcus-thermus','Dictyoglomi','Fibrobacteres','Firmicutes','Fusobacteria','Gemmatimonadetes','Lentisphaerae','Nitrospira','Planctomycetes','Proteobacteria','Spirochaetes','Thermodesulfobacteria','Thermomicrobia','Thermotogae','Verrucomicrobia','Flavobacteria','Sphingobacteria','Ochrophyta','Deinococci','Bacilli','Clostridia','Mollicutes','Bacteria','Alphaproteobacteria','Betaproteobacteria','Deltaproteobacteria','Epsilonproteobacteria','Gammaproteobacteria','Verrucomicrobiae']:
  230. error = open('errorlog','a')
  231. error.write(line)
  232. error.close()
  233. if line[0] == 'Archaea':
  234. if line[1] not in ['Crenarchaeota','Euryarchaeota']:
  235. error = open('errorlog','a')
  236. error.write(line)
  237. error.close()
  238. ##########################################################################################
  239. def cleanup(toKEEP):
  240. for f in os.listdir(os.curdir):
  241. if f not in toKEEP:
  242. os.system('mv ' + f + ' col_extra_files...check_and_discard')
  243. ##########################################################################################
  244. def mergesp(date, OTToL,file,newtxid):
  245. infile1 = open(OTToL,'r').readlines()
  246. infile2 = open(file,'r').readlines()
  247. outfile = open('to_merge','w')
  248. outfile2 = open('genera_2add2OTToL','a')
  249. genusDict = {}
  250. for line in infile1:
  251. #try:
  252. if line.split('\t')[-2] == 'genus' or line.split('\t')[-2] == 'gen.':
  253. genus = line.split('\t')[3].split()[0]
  254. txid = line.split('\t')[0]
  255. genusDict[genus] = txid
  256. #except:
  257. # print line
  258. for line in infile2:
  259. if re.search('Not assigned',line):
  260. unassignedout = open('Not_assigned','a')
  261. unassignedout.write(line)
  262. unassignedout.close()
  263. else:
  264. genus2add = line.split()[-2]
  265. species2add = line.split()[-1]
  266. if genus2add in genusDict.keys():
  267. newparid = genusDict[genus2add]
  268. newtxid = newtxid + 1
  269. today = datetime.datetime.now().strftime('%m_%d_%y')
  270. newline = str(newtxid) + '\t' + str(newparid) + '\tCoL_' + date + '\t' + genus2add + ' ' + species2add + '\t\tspecies\t' + today + '\n'
  271. outfile.write(newline)
  272. else:
  273. outfile2.write(line)
  274. outfile.close()
  275. outfile2.close()
  276. return newtxid
  277. ##########################################################################################
  278. def mergegen(date, OTToL,file,newtxid):
  279. infile1 = open(OTToL,'r').readlines()
  280. infile2 = open(file,'r').readlines()
  281. outfile = open('to_merge','a')
  282. outfile2 = open('stillNotinOTToL','a')
  283. familyDict = {}
  284. addedList= []
  285. for line in infile1:
  286. #try:
  287. if line.split('\t')[-2] != 'species' and line.split('\t')[-2] != 'subspecies' and line.split('\t')[-2] != 'genus':
  288. try:
  289. family = line.split('\t')[3].split()[0]
  290. except:
  291. family = line.split('\t')[3]
  292. txid = line.split('\t')[0]
  293. familyDict[family] = txid
  294. #except:
  295. # print line
  296. for line in infile2:
  297. genus2add = line.split()[-2]
  298. fam2add = line.split()[-3]
  299. if genus2add.strip() not in addedList:
  300. addedList.append(genus2add.strip())
  301. if fam2add in familyDict.keys():
  302. newparid = familyDict[fam2add]
  303. newtxid = newtxid + 1
  304. today = datetime.datetime.now().strftime('%m_%d_%y')
  305. newline = str(newtxid) + '\t' + str(newparid) + '\tCoL_' + date + '\t' + genus2add + '\t\tgenus\t' + today + '\n'
  306. outfile.write(newline)
  307. else:
  308. outfile2.write(line)
  309. outfile.close()
  310. outfile2.close()
  311. add(OTToL,'to_merge')
  312. os.system('cat ' + OTToL + ' toAdd_' + OTToL + '+to_merge > ' + OTToL + 'genadded') #FIXED THIS TO LOOK FOR DUPLICATES/HOMONYMS
  313. os.system('cp to_merge to_merge_1')
  314. os.system('mv ' + file + ' ' + file + '_1')
  315. mergesp(date, OTToL + 'genadded',file + '_1',newtxid+1 )
  316. ##########################################################################################
  317. def add(file,file2):
  318. infile1 = open(file,'r').readlines()
  319. infile2 = open(file2,'r').readlines()
  320. outfile = open('toAdd_' + file + '+' + file2,'w')
  321. outfile2 = open('dups_2check_before_adding2_' + file + '+' + file2,'w')
  322. outfile3 = open('homonyms_2check_before_adding2_' + file + '+' + file2,'w')
  323. taxlist = []
  324. homlist = []
  325. taxidlist = []
  326. taxlist2 = []
  327. for line in infile1:
  328. tax = line.split('\t')[3]
  329. taxid = line.split('\t')[0]
  330. db = line.split('\t')[2]
  331. taxlist.append(tax)
  332. taxidlist.append(taxid)
  333. if re.search('_hom',db):
  334. homlist.append(tax)
  335. #print 'taxlist made'
  336. for line in infile2:
  337. tax = line.split('\t')[3]
  338. taxid = line.split('\t')[0]
  339. parid = line.split('\t')[1]
  340. if tax not in taxlist2:
  341. taxlist2.append(tax)
  342. if tax not in taxlist:
  343. if checkID(taxid,parid,taxidlist) == True:
  344. outfile.write(line)
  345. elif checkID(taxid,parid,taxidlist) == 'parent':
  346. errorlog = open('adderrorlog','a')
  347. errorlog.write('No Parent: ' + line)
  348. errorlog.close()
  349. elif checkID(taxid,parid,taxidlist) == 'taxon':
  350. errorlog = open('adderrorlog','a')
  351. errorlog.write('TaxID exists: ' + line)
  352. errorlog.close()
  353. else:
  354. if tax in homlist:
  355. outfile3.write(line)
  356. else:
  357. outfile2.write(line)
  358. else:
  359. errorlog = open('adderrorlog','a')
  360. errorlog.write('Trying to add twice: ' + line)
  361. errorlog.close()
  362. def checkID(taxid,parid,taxidlist):
  363. if taxid in taxidlist:
  364. return 'taxon'
  365. elif parid not in taxidlist:
  366. return 'parent'
  367. else:
  368. return True
  369. ##########################################################################################
  370. def main():
  371. # OTToL = raw_input('What is the latest OTToL? ')
  372. # date = raw_input('What is the download date for the latest OTToL? ')
  373. date = 'today'
  374. OTToL = 'OTToL080912v2'
  375. parsetextfromCoL()
  376. for f in ['acceptedNames','provisionallyAcceptedNames']:
  377. renameCoL(f)
  378. check(f)
  379. os.system('cat taxa_provisionallyAcceptedNames taxa_acceptedNames > allCoLrenamed_taxa')
  380. mark_notinOTToL(OTToL)
  381. os.system('mkdir col_extra_files...check_and_discard')
  382. toKEEP = [OTToL,'new_sp_2add2OTToL','col','col_extra_files...check_and_discard','updateOTToLnewCoL.py']
  383. cleanup(toKEEP)
  384. ##############################
  385. nextnum = findlastnum(OTToL, 'CoL')
  386. nextnum = mergesp(date, OTToL,'new_sp_2add2OTToL',nextnum)
  387. mergegen(date, OTToL,'genera_2add2OTToL',nextnum)
  388. add(OTToL + 'genadded','to_merge')
  389. os.system('cat ' + OTToL + 'genadded toAdd_' + OTToL + 'genadded+to_merge > ' + OTToL + 'final')
  390. toKEEP = [OTToL + 'final','col','col_extra_files...check_and_discard','updateOTToLnewCoL.py','updateOTToLnewCoL_README']
  391. cleanup(toKEEP)
  392. main()