PageRenderTime 50ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/query.py

https://bitbucket.org/iraklis_k/samidb
Python | 543 lines | 482 code | 12 blank | 49 comment | 10 complexity | 87e518d054e545762842808d5f2b3a9e MD5 | raw file
Possible License(s): BSD-3-Clause
  1. """
  2. query.py
  3. --------
  4. Python module within samiDB.
  5. Description:
  6. Query codes for the SAMI Galaxy Survey data archive. The main code is queryMultiple(), which uses PyTables to execute a series of individual queries and combine their outcomes. The rest supports this code, which has now superseded the old queryMaster() code on which it was based.
  7. Development:
  8. 08.10.2013, Iraklis Konstantopoulos (iraklis@aao.gov.au). Contributed codes acknowledged in the correspoding documentation strings.
  9. Dependencies:
  10. tables, utils, os, h5py, sys, numpy
  11. """
  12. import tables
  13. # ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  14. def print_sami(s, idfile, queryText, outFile=True, verbose=True):
  15. # ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  16. """ Define a Row Iterator for screen output.
  17. This bit that executes a PyTables query. All the coordination for this is done in queryMultiple() or queryMaster(), but the row iterator actually does the heavy lifting, one row (s) at a time. It can also output the ID list returned from the query code as an ascii file.
  18. Args:
  19. s [str]: An individual row of the table being queried.
  20. idfile [str]: Filename for output ID list (ascii).
  21. queryText [str]: Query being executed for screen output.
  22. Keyword args:
  23. outFile [boo]: File output toggle.
  24. verbose [boo]: Toggles diagnostic and declarative verbosity.
  25. """
  26. # Prepare some variables.
  27. counter = 0
  28. idlist = []
  29. # Iterate over all supplied rows.
  30. if outFile:
  31. f = open(idfile, 'w')
  32. for tables.row in s:
  33. name, z = tables.row['CATID'], tables.row['z_spec']
  34. if verbose:
  35. print(" Found SAMI galaxy %s at redshift z=%g" % (name, z))
  36. counter += 1
  37. idlist.append(name)
  38. if outFile:
  39. f.write(str(name)+'\n')
  40. if verbose:
  41. print("\n Found "+str(counter)+" galaxies satisfying query:\n "+
  42. queryText)
  43. if outFile:
  44. f.close()
  45. return(idlist)
  46. # ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  47. def makeTable(table, tabIndex, quickLookDir):
  48. # ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  49. """ Generate an html table for website output.
  50. Outputs an html table containing the rows satisfying a query (or list of queries). The quickLookDir arg will be phased out as soon as the single-object viewer can be generated on-the-fly, although it will remain in use by the flat backup filesystem.
  51. Args:
  52. table [str]: The name of the table being parsed (within h5 block).
  53. tabIndex [str]: Index array for the rows of a table to be display.
  54. quickLookDir [str]: URL where quicklook PDFs can be foind on server.
  55. """
  56. print('TEST')
  57. print(tabIndex)
  58. print('TEST')
  59. # Write html preamble.
  60. htmlTab = '<html><body><table>'
  61. # Write Row 0, column headings.
  62. htmlTab = htmlTab+\
  63. "<tr>"+\
  64. "<td>Quicklook</td>"+\
  65. "".join(["<td>"+str(s)+"</td>" for s in table.colnames])+\
  66. "</tr>"
  67. # Populate the table.
  68. for tables.row in table[tabIndex]:
  69. hlink = "<td><a href='"+quickLookDir+str(tables.row[0])+".pdf'>" +\
  70. "View</a></td>"
  71. try:
  72. htmlTab = htmlTab+\
  73. "<tr>" + hlink+\
  74. "".join(["<td>"+str(s)+"</td>" for s in tables.row])+\
  75. "</tr>"
  76. except:
  77. print('Nah, mate [2]')
  78. # Wrap up html, return table.
  79. htmlTab = htmlTab +"</table></body></html>"
  80. return(htmlTab)
  81. """ This used to write an html file. Keeping old code here for now.
  82. # Open a file to write the table, write html preamble.
  83. f = open(tableOut, 'w')
  84. f.write('<html><body><table>')
  85. f.write("<tr>" +
  86. "<td>Quicklook</td>" +
  87. "".join(["<td>"+str(s)+"</td>" for s in table.colnames]) +
  88. "</tr>")
  89. # Where do the quick-look plots live?
  90. baseURL ='file:///Users/iraklis/Data/SAMI/datasheets/GAMA/'
  91. # Do the deed.
  92. for tables.row in table[tabIndex]:
  93. hlink = "<td><a href='"+baseURL+str(tables.row[0])+".pdf'>" +\
  94. "View</a></td>"
  95. f.write("<tr>" + hlink +
  96. "".join(["<td>"+str(s)+"</td>" for s in tables.row]) +
  97. "</tr>")
  98. # Wrap up html.
  99. f.write("</table></body></html>")
  100. f.close()
  101. """
  102. # ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  103. def queryMaster(h5file, queryIn, version='', idfile='sami_query.lis',
  104. verbose=False, returnID=False, tabulate=True,
  105. quickLookDir='./', overwrite=True):
  106. # ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  107. """ Read a SAMI master table and perform a query
  108. This code queries the SAMI target table. It is now obsolete, as the more advanced queryMultiple() code can query the target table in as little time. It has not, however, been deleted, as an application for it may arise when we deploy the SAMI server interface. See above for an explanation of the quickLookDir argument. The rest should be self-explanatory, but drop the maintainer a line if you don't think so. The output of all query codes is restricted to lists of SAMI identifiers (long integers). This way the lists can be read in by the export code and be modular. This also helps with keeping transactions between the Python code and the server as simple as possible.
  109. Args:
  110. h5file [str]: Name of SAMI archive file.
  111. queryIn [str]: Query text or name of file containing the query string.
  112. Keyword args:
  113. version [str]: Data version to query. Blank string for latest version.
  114. idfile [str]: Name of output ID list ascii file.
  115. returnID [boo]: Toggle ascii file output.
  116. tabulate [boo]: Toggle html table output (always true for server).
  117. quickLookDir [str]: URL where quicklook PDFs can be foind on server.
  118. overwrite [boo]: Clobber flag for 'idfile'.
  119. verbose [boo]: Toggles diagnostic and declarative verbosity.
  120. """
  121. import utils
  122. import os
  123. import h5py as h5
  124. # Interpret the 'query' argument (look for a filename).
  125. if os.path.isfile(queryIn):
  126. fq = open(queryIn, 'r')
  127. queryText = fq.readlines()[0]
  128. else:
  129. queryText = queryIn
  130. # Check that the nominated h5 file exists.
  131. if not os.path.isfile(h5file):
  132. raise SystemExit("Cannot find nominated HDF5 file ('"+h5file+"').")
  133. # Get latest data version, if not supplied
  134. hdf0 = h5.File(h5file, 'r')
  135. version = utils.getVersion(h5file, hdf0, version)
  136. hdf0.close()
  137. # Open-read h5file.
  138. hdf = tables.openFile(h5file, 'r')
  139. # Optionally open an ascii file to write IDs returned by the query.
  140. if returnID:
  141. # Check if the file exists, check overwrite flag:
  142. if os.path.isfile(idfile):
  143. if not overwrite:
  144. hdf.close()
  145. raise SystemExit("The nominated output file ('"+idfile+"') "+
  146. "already exists. Please raise the 'overwrite'"+
  147. " flag or enter a different filename. ")
  148. # Identify the SAMI master table, assumed to live in the Table directory
  149. g_table = hdf.getNode('/SAMI/'+version+'/Table/')
  150. master = g_table.SAMI_MASTER
  151. # Run the row iterator.
  152. try:
  153. idlist = print_sami(master.where(queryText), idfile,
  154. queryText, outFile=returnID, verbose=verbose)
  155. except:
  156. hdf.close()
  157. raise SystemExit("Oops! Your query was not understood. Please "+
  158. "check the spelling of the chosen variable.")
  159. # Generate a table, if requested, given a row index and the table.
  160. if tabulate:
  161. tabIndex = [row.nrow for row in master.where(queryText)]
  162. madeTab = makeTable(master, tabIndex, quickLookDir)
  163. hdf.close()
  164. return(madeTab)
  165. # Otherwise close h5 file and return list of query results.
  166. if not tabulate:
  167. hdf.close()
  168. return(idlist)
  169. # ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  170. def queryMultiple(h5file, queryIn, writeFile=False, outFile='multipleQuery.lis',
  171. overwrite=False, tabulate=True, quickLookDir='./',
  172. verbose=True, version=''):
  173. # ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  174. """ Query multiple tables and combine results
  175. Very much like queryMaster(), in fact it is an extension of that code. So please have a look at the queryMaster() documentation string for some basic information on how SAMI queries are executed. This code runs a series of individual queries and then performs an intersection of the resultant ID lists. No effort has been made to immitate the speedup advantage offered by a proper table 'join', as this seems to be on the development list for PyTables proper. The code is not slow, however, and should satisfy even the most demanding queries.
  176. Args:
  177. h5file [str]: Name of SAMI archive file.
  178. queryIn [str]: Query text or name of file containing the query string.
  179. Keyword args:
  180. writeFile [boo]: Toggle ascii file output.
  181. outFile [str]: Name of output ID list ascii file.
  182. version [str]: Data version to query. Blank string for latest version.
  183. tabulate [boo]: Toggle html table output (always true for server).
  184. quickLookDir [str]: URL where quicklook PDFs can be foind on server.
  185. overwrite [boo]: Clobber flag for 'outFile'.
  186. verbose [boo]: Toggles diagnostic and declarative verbosity.
  187. """
  188. import utils
  189. import os
  190. import h5py as h5
  191. # Interpret the 'query' argument (look for a filename).
  192. if os.path.isfile(queryIn):
  193. queryInFile = True
  194. else:
  195. queryInFile = False
  196. # Check that the nominated h5 file exists.
  197. if not os.path.isfile(h5file):
  198. raise SystemExit("Cannot find nominated HDF5 file ('"+h5file+"').")
  199. # Is the query being read from a file?
  200. if queryInFile:
  201. # Open and read the query file line-per-line (even=table,odd=query).
  202. counter = 0
  203. tabs, queries = [], []
  204. with open(queryIn) as f:
  205. for i, l in enumerate(f):
  206. # Read the file in odd even lines
  207. if i%2==0:
  208. tabs.append(l.strip())
  209. else:
  210. queries.append(l.strip())
  211. counter += 1
  212. f.close()
  213. # Or is it being supplied as a multi-line string?
  214. else:
  215. # Split odd-even lines.
  216. tabs = queryIn.split('\n')[::2]
  217. queries = queryIn.split('\n')[1::2]
  218. # Get rid of any blank lines...
  219. while '' in tabs:
  220. tabs.remove('')
  221. while '' in queries:
  222. queries.remove('')
  223. # ...and any whitespace.
  224. tabs = [s.strip() for s in tabs]
  225. query = [s.strip() for s in queries]
  226. # Keep a counter of tables+queries for loops to come.
  227. counter = len(tabs) + len(queries)
  228. # Check if the length of the two lists is equal.
  229. if len(tabs) != len(queries):
  230. raise SystemExit("Table-query mismatch. Please input an equal "+
  231. "number of 'table' and 'query' lines. ")
  232. else:
  233. print("Read %g queries" % (counter/2))
  234. if verbose:
  235. for i in range(counter/2):
  236. print("In table '"+tabs[i]+"' query: "+queries[i])
  237. print("")
  238. # Get latest data version, if not supplied
  239. hdf0 = h5.File(h5file, 'r')
  240. version = utils.getVersion(h5file, hdf0, version)
  241. hdf0.close()
  242. # Now run all queries:
  243. # Open h5 file with PyTabs:
  244. hdf = tables.openFile(h5file, 'r')
  245. # Read all tables, append to list
  246. h5tabs = []
  247. for i in range(counter/2):
  248. try:
  249. h5tabs.append(hdf.getNode('/SAMI/'+version+'/Table/', tabs[i]))
  250. except:
  251. hdf.close()
  252. raise SystemExit("Oops! Your query was not understood. Please "+
  253. "check the spelling of table '"+tabs[i]+"'.")
  254. # OK, have the tables defined as variables, now need to query them.
  255. # Run each query:
  256. all_lists = []
  257. for i in range(counter/2):
  258. try:
  259. idlist = print_sami(h5tabs[i].where(queries[i]), outFile,
  260. queries[i], outFile=False, verbose=False)
  261. all_lists.append(idlist)
  262. except:
  263. hdf.close()
  264. raise SystemExit("Oops! Your query was not understood. Please "+
  265. "check the spelling of query '"+queries[i]+"'.")
  266. if verbose:
  267. print("Query "+str(i+1)+": Found "+str(len(idlist))+
  268. " galaxies satisfying "+queries[i])
  269. # Finally, intersect all idlists within all_lists container return, exit.
  270. final_idlist = set(all_lists[0]).intersection(*all_lists)
  271. if verbose:
  272. print("\n ------- Found "+str(len(final_idlist))+
  273. " galaxies satisfying all queries simultaneously.")
  274. if writeFile:
  275. # Check if the file exists.
  276. if (os.path.isfile(outFile)) and (not overwrite):
  277. print("\nFile already exists, please choose other filename or "+
  278. "raise 'overwrite' flag.")
  279. if (not os.path.isfile(outFile)) or overwrite:
  280. f = open(outFile, 'w')
  281. [f.write(s) for s in str(list(final_idlist))]
  282. f.close()
  283. if tabulate:
  284. # Identify the SAMI master table, assumed to live in the Table directory
  285. g_table = hdf.getNode('/SAMI/'+version+'/Table/')
  286. master = g_table.SAMI_MASTER
  287. # Iterate over Master, locate CATID in final_idlist.
  288. tabIndex = []
  289. for row in master:
  290. if row['CATID'] in final_idlist:
  291. tabIndex.append(row.nrow)
  292. madeTab = makeTable(master, tabIndex, quickLookDir)
  293. hdf.close()
  294. return(madeTab)
  295. else:
  296. hdf.close()
  297. return(final_idlist)
  298. # ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  299. def test_contents(h5file):
  300. # ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  301. """ Check if h5file contains cubes and RSS datasets
  302. This development code is largely obsolete now, but can morph into a quality assessment tool down the track.
  303. Args:
  304. h5file [str]: HDF5 archive to be scanned.
  305. """
  306. ### UNDER CONSTRUCTION ###
  307. import h5py as h5
  308. import sys
  309. f = h5.File(h5file)
  310. if 'SAMI' in f.keys():
  311. if ('Targets' in f['SAMI'].keys()):
  312. print('File is SAMI-formatted')
  313. targ_gr = f['SAMI/Targets']
  314. targets = targ_gr.keys()
  315. else: sys.exit('File is not SAMI-formatted')
  316. for i in range(len(targets)):
  317. # Check contents of each group:
  318. all_cube = ['Blue_cube_data', 'Blue_cube_variance',
  319. 'Blue_cube_weight', 'Red_cube_data',
  320. 'Red_cube_variance', 'Red_cube_weight']
  321. all_rss = ['Blue_RSS_data', 'Blue_RSS_variance',
  322. 'Red_RSS_data', 'Red_RSS_variance']
  323. if all_cube[:] in f['SAMI/Targets/'+targets[i]+'/v01'].keys():
  324. print(targets[i], 'is complete')
  325. else: sys.exit('File is not SAMI-formatted')
  326. # ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  327. def querycone(h5file, RAc, DECc, radius, version='', idfile='sami_query.lis',
  328. outFile=True, verbose=True, returnID=True, overwrite=True):
  329. # ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  330. """ Performs simple cone search, returns ID list.
  331. Written by Luca Cortese, based on the queryMaster() function. This should be self explanatory to astronomers, please contact the maintainer if you would like to see some clarifications inserted here.
  332. Args:
  333. h5file [str]: Name of SAMI archive file.
  334. RAc [flt]: Right ascension at centre of cone in decimal degrees.
  335. DECc [flt]: Declination at centre of cone in decimal degrees.
  336. radius [flt]: Radius of cone in decimal degrees.
  337. Keyword args:
  338. version [str]: Data version to query. Blank string for latest version.
  339. idfile [str]: Name of output ID list ascii file.
  340. outFile [boo]: Toggle ascii file output.
  341. returnID [boo]: Toggle ascii file output.
  342. overwrite [boo]: Clobber flag for 'outfile'.
  343. verbose [boo]: Toggles diagnostic and declarative verbosity.
  344. .. warning:: Discovered a redundancy between 'outFile' and 'returnID'. Also between the 'name' variable and 'idFile' argument. [14.04.2014]
  345. """
  346. import utils
  347. import os
  348. import h5py as h5
  349. # Get latest data version, if not supplied
  350. hdf0 = h5.File(h5file, 'r')
  351. version = utils.getVersion(h5file, hdf0, version)
  352. hdf0.close()
  353. # Open-read h5file.
  354. hdf = tables.openFile(h5file, 'r')
  355. # Optionally open an ascii file to write IDs returned by the query.
  356. if returnID:
  357. # Check if the file exists, check overwrite flag:
  358. if os.path.isfile(idfile):
  359. if not overwrite:
  360. hdf.close()
  361. raise SystemExit("The nominated output file ('"+idfile+"') "+
  362. "already exists. Please raise the 'overwrite'"+
  363. " flag or enter a different filename. ")
  364. # Identify the SAMI master table, assumed to live in the Table directory
  365. g_table = hdf.getNode('/SAMI/'+version+'/Table/')
  366. master = g_table.SAMI_MASTER
  367. # Prepare some variables.
  368. counter = 0
  369. idlist = []
  370. if outFile:
  371. f = open(idfile, 'w')
  372. for tables.row in master:
  373. ra, dec = tables.row['RA'], tables.row['Dec']
  374. dist=sph_dist(ra,dec,RAc,DECc)
  375. if (dist<radius):
  376. name, z = tables.row['CATID'],
  377. tables.row['z_spec']
  378. if verbose:
  379. print(" Found SAMI galaxy %s at redshift z=%g at a distance of %f degrees" % (name, z, dist))
  380. counter += 1
  381. idlist.append(name)
  382. if outFile:
  383. f.write(str(name)+'\n')
  384. print("\n Found "+str(counter)+" galaxies satisfying the cone search: RA=%f, DEC=%f, radius=%f \n" % (RAc, DECc, radius))
  385. if outFile:
  386. f.close()
  387. # Close h5 file
  388. hdf.close()
  389. # ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  390. def sph_dist(ra1, dec1,ra2, dec2):
  391. # ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  392. """ Spherical distance calculator.
  393. Written by Luca Cortese. Computes the spherical distance between 2 pairs of coordinates using the Haversine formula. This should be self-explanatory to astronomers, please contact the maintainer if you would like to see some clarifications inserted here.
  394. Args:
  395. ra1 [flt]: Right ascension of first coordinate pair in decimal degrees.
  396. ra2 [flt]: Right ascension of second coordinate pair in decimal degrees.
  397. dec2 [flt]: Declination of first coordinate pair in decimal degrees.
  398. dec1 [flt]: Declination of second coordinate pair in decimal degrees.
  399. """
  400. import numpy as np
  401. ra1_rad = np.radians(ra1)
  402. dec1_rad = np.radians(dec1)
  403. ra2_rad = np.radians(ra2)
  404. dec2_rad = np.radians(dec2)
  405. d = np.sin((dec1_rad-dec2_rad)/2)**2;
  406. d += np.sin((ra1_rad-ra2_rad)/2)**2 * np.cos(dec1_rad)*np.cos(dec2_rad)
  407. return np.degrees(2*np.arcsin(np.sqrt(d)))