PageRenderTime 50ms CodeModel.GetById 15ms RepoModel.GetById 1ms app.codeStats 0ms

/BigQueryClient.cs

https://github.com/lucamartinetti/google-bigquery-client
C# | 563 lines | 439 code | 102 blank | 22 comment | 30 complexity | 218a9a217bbb5bf9a9b3ad338565cb64 MD5 | raw file
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Collections.Specialized;
  4. using System.Diagnostics;
  5. using System.IO;
  6. using System.Linq;
  7. using System.Net;
  8. using System.Text;
  9. using System.Threading;
  10. using System.Threading.Tasks;
  11. using Newtonsoft.Json;
  12. using Newtonsoft.Json.Linq;
  13. namespace Staq.Google.BigQuery
  14. {
  15. public class QueryResponse
  16. {
  17. public QueryResponse(string jobId)
  18. {
  19. //if (String.IsNullOrWhiteSpace(jobId)) throw new ArgumentNullException("jobId");
  20. JobId = jobId;
  21. JobComplete = false;
  22. }
  23. public QueryResponse(Field[] fields, Row[] rows, string jobId, long totalRows)
  24. {
  25. if (fields == null) throw new ArgumentNullException("fields");
  26. if (rows == null) throw new ArgumentNullException("rows");
  27. if (String.IsNullOrWhiteSpace(jobId)) throw new ArgumentNullException("jobId");
  28. Fields = fields;
  29. Rows = rows;
  30. JobId = jobId;
  31. TotalRows = totalRows;
  32. JobComplete = true;
  33. }
  34. public Field[] Fields { get; private set; }
  35. public Row[] Rows { get; private set; }
  36. public string JobId { get; private set; }
  37. public bool JobComplete { get; private set; }
  38. public long TotalRows { get; private set; }
  39. }
  40. public class Field
  41. {
  42. public Field(string name, string type, string mode, Field[] fields = null)
  43. {
  44. Name = name;
  45. Type = type;
  46. Mode = mode;
  47. Fields = fields;
  48. }
  49. public string Name { get; private set; }
  50. public string Type { get; private set; }
  51. public string Mode { get; private set; }
  52. public Field[] Fields { get; set; }
  53. }
  54. public class Row
  55. {
  56. public Row(object[] values)
  57. {
  58. Values = values;
  59. }
  60. public object[] Values { get; private set; }
  61. }
  62. public class BigQueryClient
  63. {
  64. static BigQueryClient()
  65. {
  66. ServicePointManager.DefaultConnectionLimit = int.MaxValue;
  67. ServicePointManager.UseNagleAlgorithm = false;
  68. ServicePointManager.Expect100Continue = false;
  69. }
  70. public static string GetCodeUrl(string clientId, string redirectUri = "urn:ietf:wg:oauth:2.0:oob", string scope = "https://www.googleapis.com/auth/bigquery")
  71. {
  72. return String.Format("https://accounts.google.com/o/oauth2/auth?response_type=code&client_id={0}&redirect_uri={1}&scope={2}", clientId, redirectUri, scope);
  73. }
  74. public static void GetRefreshToken(string code, string clientId, string clientSecret, out string accessToken, out string refreshToken, string redirectUri = "urn:ietf:wg:oauth:2.0:oob")
  75. {
  76. const string uri = @"https://accounts.google.com/o/oauth2/token";
  77. var client = new WebClient();
  78. var values = new NameValueCollection {{"client_id", clientId}, {"client_secret", clientSecret}, {"grant_type", "authorization_code"}, {"redirect_uri", redirectUri}, {"code", code}};
  79. Byte[] responseBytes = client.UploadValues(uri, values);
  80. string json = Encoding.UTF8.GetString(responseBytes);
  81. var ret = (JObject) JsonConvert.DeserializeObject(json);
  82. accessToken = (string) ret.GetValue("access_token");
  83. refreshToken = (string) ret.GetValue("refresh_token");
  84. }
  85. public static async Task<string> GetAccessTokenAsync(string clientId, string clientSecret, string refreshToken)
  86. {
  87. const string uri = @"https://accounts.google.com/o/oauth2/token";
  88. var client = new WebClient();
  89. var values = new NameValueCollection {{"client_id", clientId}, {"client_secret", clientSecret}, {"grant_type", "refresh_token"}, {"refresh_token", refreshToken}};
  90. Byte[] responseBytes = await client.UploadValuesTaskAsync(uri, values);
  91. string json = Encoding.UTF8.GetString(responseBytes);
  92. var ret = (JObject) JsonConvert.DeserializeObject(json);
  93. var accessToken = (string) ret.GetValue("access_token");
  94. return accessToken;
  95. }
  96. public static string[] ListDatasets(string projectId, string accessToken)
  97. {
  98. string uri = string.Format("https://www.googleapis.com/bigquery/v2/projects/{0}/datasets", projectId);
  99. var request = (HttpWebRequest) WebRequest.Create(uri);
  100. request.Method = "GET";
  101. request.Headers.Add("Authorization", "Bearer " + accessToken);
  102. string json;
  103. using (WebResponse response = request.GetResponse())
  104. {
  105. json = new StreamReader(response.GetResponseStream()).ReadToEnd();
  106. }
  107. var ret = JsonConvert.DeserializeObject<JObject>(json);
  108. return ret.Value<JArray>("datasets").Values<JObject>().Select(j => j.Value<string>("id").Split(':')[1]).ToArray();
  109. }
  110. public class Job
  111. {
  112. public Job(string jobId, string status, long totalBytesProcessed, DateTime startTime, DateTime endTime)
  113. {
  114. JobId = jobId;
  115. State = status;
  116. TotalBytesProcessed = totalBytesProcessed;
  117. StartTime = startTime;
  118. EndTime = endTime;
  119. }
  120. public string JobId { get; private set; }
  121. public string State { get; private set; }
  122. public long TotalBytesProcessed { get; private set; }
  123. public DateTime StartTime { get; private set; }
  124. public DateTime EndTime { get; private set; }
  125. public long TotalMilliseconds { get
  126. {
  127. return (long) (EndTime - StartTime).TotalMilliseconds;
  128. }}
  129. }
  130. public static Job[] ListJobs(string projectId, string accessToken)
  131. {
  132. string uri = string.Format("https://www.googleapis.com/bigquery/v2/projects/{0}/jobs", projectId);
  133. var request = (HttpWebRequest)WebRequest.Create(uri);
  134. request.Method = "GET";
  135. request.Headers.Add("Authorization", "Bearer " + accessToken);
  136. string json;
  137. using (WebResponse response = request.GetResponse())
  138. {
  139. json = new StreamReader(response.GetResponseStream()).ReadToEnd();
  140. }
  141. // "jobs": [
  142. //{
  143. // "id": "1025130940872:job_5838b5f8ae584dce8cb8b47834802d6b",
  144. // "kind": "bigquery#job",
  145. // "jobReference": {
  146. // "projectId": "1025130940872",
  147. // "jobId": "job_5838b5f8ae584dce8cb8b47834802d6b"
  148. // },
  149. // "state": "DONE",
  150. // "statistics": {
  151. // "startTime": "1355792636745",
  152. // "endTime": "1355792705816",
  153. // "totalBytesProcessed": "408367008",
  154. // "query": {
  155. // "totalBytesProcessed": "408367008"
  156. // }
  157. // },
  158. // "status": {
  159. // "state": "DONE"
  160. // }
  161. //},
  162. var ret = JsonConvert.DeserializeObject<JObject>(json);
  163. return ret.Value<JArray>("jobs").Values<JObject>().Select(j =>
  164. {
  165. var id = j.Value<string>("id").Split(':')[1];
  166. var state = j.Value<string>("state");
  167. var statistics = j.Value<JObject>("statistics");
  168. var totalBytesProcessed = statistics.Value<long>("totalBytesProcessed");
  169. var startTime = ParseTicks(statistics.Value<long>("startTime"));
  170. var endTime = ParseTicks(statistics.Value<long>("endTime"));
  171. return new Job(id, state, totalBytesProcessed,startTime,endTime);
  172. }).ToArray();
  173. }
  174. public static string[] ListTables(string projectId, string datasetId, string accessToken)
  175. {
  176. string uri = string.Format("https://www.googleapis.com/bigquery/v2/projects/{0}/datasets/{1}/tables", projectId, datasetId);
  177. var request = (HttpWebRequest) WebRequest.Create(uri);
  178. request.Method = "GET";
  179. request.Headers.Add("Authorization", "Bearer " + accessToken);
  180. string json;
  181. using (WebResponse response = request.GetResponse())
  182. {
  183. json = new StreamReader(response.GetResponseStream()).ReadToEnd();
  184. }
  185. var ret = JsonConvert.DeserializeObject<JObject>(json);
  186. return ret.Value<JArray>("tables").Values<JObject>().Select(j => j.Value<string>("id").Split(':')[1]).ToArray();
  187. }
  188. public static async Task<string> StartQueryAsync(string projectId, string datasetId, string query, string accessToken)
  189. {
  190. QueryResponse ret = await QueryAsync(projectId, datasetId, query, accessToken, 0, 0, false);
  191. return ret.JobId;
  192. }
  193. public static async Task<QueryResponse> QueryAsync(string projectId, string datasetId, string query, string accessToken, long timeoutMs, long maxResults = 1000, bool waitForCompletition = true, bool dryRun = false)
  194. {
  195. var doc = new JObject {
  196. {"kind", "bigquery#queryRequest"},
  197. {"query", query},
  198. {"maxResults", maxResults},
  199. {"defaultDataset", new JObject {{"datasetId", datasetId},{"projectId", projectId}}},
  200. {"timeoutMs", timeoutMs}
  201. };
  202. if (dryRun)
  203. doc.Add(new JProperty("dryrun",true));
  204. string uri = string.Format("https://www.googleapis.com/bigquery/v2/projects/{0}/queries", projectId);
  205. var request = (HttpWebRequest) WebRequest.Create(uri);
  206. request.Method = "POST";
  207. request.Headers.Add("Authorization", "Bearer " + accessToken);
  208. request.ContentType = "application/json";
  209. string requestJson = doc.ToString();
  210. byte[] buffer = Encoding.UTF8.GetBytes(requestJson);
  211. request.ContentLength = buffer.Length;
  212. request.GetRequestStream().Write(buffer, 0, buffer.Length);
  213. string responseJson;
  214. try
  215. {
  216. using (WebResponse response = await request.GetResponseAsync())
  217. {
  218. responseJson = new StreamReader(response.GetResponseStream()).ReadToEnd();
  219. }
  220. }
  221. catch (WebException webException)
  222. {
  223. HandleWebException(webException);
  224. throw;
  225. }
  226. var body = JsonConvert.DeserializeObject<JObject>(responseJson);
  227. var jobId = body.Value<JObject>("jobReference").Value<string>("jobId");
  228. var isComplete = body.Value<bool>("jobComplete");
  229. Debug.WriteLine("[{2}] QueryAsync\t{0}\t{1}", jobId, isComplete, Thread.CurrentThread.ManagedThreadId);
  230. long totalRows = 0;
  231. Field[] fields = null;
  232. Row[] rows = null;
  233. if (isComplete)
  234. {
  235. totalRows = body.Value<long>("totalRows");
  236. fields = ParseFields(body.Value<JObject>("schema").Value<JArray>("fields"));
  237. rows = ParseRowsFromResponse(body, fields);
  238. }
  239. else
  240. {
  241. if (!waitForCompletition)
  242. return new QueryResponse(jobId);
  243. return await GetQueryResponseAsync(projectId, jobId, accessToken, maxResults);
  244. }
  245. return new QueryResponse(fields, rows, jobId, totalRows);
  246. }
  247. private static void HandleWebException(WebException webException)
  248. {
  249. if (webException.Response != null)
  250. {
  251. string responseJson = new StreamReader(webException.Response.GetResponseStream()).ReadToEnd();
  252. var errorMessage = JsonConvert.DeserializeObject<JObject>(responseJson).Value<JObject>("error").Value<string>("message");
  253. throw new ArgumentException(errorMessage,webException);
  254. }
  255. throw webException;
  256. }
  257. public static async Task<QueryResponse> GetQueryResponseAsync(string projectId, string jobId, string accessToken, long pageSize = 1000)
  258. {
  259. int returnedRows = 0;
  260. long totalRows = 0;
  261. var ret = new List<Row>();
  262. Field[] fields = null;
  263. Row[] rows = null;
  264. while (true)
  265. {
  266. bool isComplete;
  267. do
  268. {
  269. QueryResponse response = await GetQueryResultAsync(projectId, jobId, accessToken, returnedRows, pageSize);
  270. isComplete = response.JobComplete;
  271. if (isComplete)
  272. {
  273. totalRows = response.TotalRows;
  274. fields = response.Fields;
  275. rows = response.Rows;
  276. }
  277. else
  278. {
  279. Thread.Sleep(100);
  280. }
  281. } while (!isComplete);
  282. returnedRows += rows.Length;
  283. ret.AddRange(rows);
  284. if (returnedRows == totalRows)
  285. break;
  286. }
  287. return new QueryResponse(fields, ret.ToArray(), jobId, totalRows);
  288. }
  289. private static Row[] ParseRowsFromResponse(JObject ret, Field[] fields)
  290. {
  291. var rows = ret.Value<JArray>("rows");
  292. if (rows == null)
  293. return new Row[0];
  294. return rows.Select(row => row.Value<JArray>("f")).Select(f =>
  295. {
  296. JValue[] values = f.Values<JValue>("v").ToArray();
  297. object[] parsedValues = values.Select((v, i) =>
  298. {
  299. string type = fields[i].Type;
  300. switch (type)
  301. {
  302. case "INTEGER":
  303. return (Object) ((int?) v);
  304. case "STRING":
  305. return (Object) ((string) v);
  306. case "FLOAT":
  307. return (Object) ((double?) v);
  308. case "BOOLEAN":
  309. return (Object) ((bool?) v);
  310. default:
  311. throw new NotImplementedException();
  312. }
  313. }).ToArray();
  314. return new Row(parsedValues);
  315. }).ToArray();
  316. }
  317. public static Field[] ParseFields(JArray a)
  318. {
  319. return a.Select(f =>
  320. {
  321. var fieldsArray = f.Value<JArray>("fields");
  322. if (fieldsArray == null)
  323. {
  324. return new Field(f.Value<string>("name"), f.Value<string>("type"), f.Value<string>("mode"));
  325. }
  326. else
  327. {
  328. return new Field(f.Value<string>("name"), f.Value<string>("type"), f.Value<string>("mode"), ParseFields(fieldsArray));
  329. }
  330. }).ToArray();
  331. }
  332. private static async Task<QueryResponse> GetQueryResultAsync(string projectId, string jobId, string accessToken, long startIndex = 0, long pageSize = 1000)
  333. {
  334. string uri = string.Format("https://www.googleapis.com/bigquery/v2/projects/{0}/queries/{1}?startIndex={2}&maxResults={3}", projectId, jobId, startIndex, pageSize);
  335. var request = (HttpWebRequest) WebRequest.Create(uri);
  336. request.Method = "GET";
  337. request.Headers.Add("Authorization", "Bearer " + accessToken);
  338. request.ContentType = "application/json";
  339. using (WebResponse response = await request.GetResponseAsync())
  340. {
  341. string responseJson = new StreamReader(response.GetResponseStream()).ReadToEnd();
  342. var ret = JsonConvert.DeserializeObject<JObject>(responseJson);
  343. var jobComplete = ret.Value<bool>("jobComplete");
  344. Debug.WriteLine("[{2}] GetQueryResultAsync\t{0}\t{1}", jobId, jobComplete, Thread.CurrentThread.ManagedThreadId);
  345. if (jobComplete)
  346. {
  347. var totalRows = ret.Value<long>("totalRows");
  348. Field[] fields = ParseFields(ret.Value<JObject>("schema").Value<JArray>("fields"));
  349. Row[] rows = ParseRowsFromResponse(ret, fields);
  350. return new QueryResponse(fields, rows, jobId, totalRows);
  351. }
  352. return new QueryResponse(jobId);
  353. }
  354. }
  355. public static async Task<string> GetJobStatusAsync(string projectId, string jobId, string accessToken)
  356. {
  357. string uri = string.Format("https://www.googleapis.com/bigquery/v2/projects/{0}/jobs/{1}", projectId, jobId);
  358. var request = (HttpWebRequest) WebRequest.Create(uri);
  359. request.Method = "GET";
  360. request.Headers.Add("Authorization", "Bearer " + accessToken);
  361. request.ContentType = "application/json";
  362. using (WebResponse response = await request.GetResponseAsync())
  363. {
  364. string responseJson = new StreamReader(response.GetResponseStream()).ReadToEnd();
  365. var body = JsonConvert.DeserializeObject<JObject>(responseJson);
  366. var status = body.Value<JObject>("status");
  367. var state = status.Value<string>("state");
  368. if (status.Value<JObject>("errorResult") != null)
  369. {
  370. var errorMessagge = status.Value<JObject>("errorResult").Value<string>("message");
  371. throw new Exception(errorMessagge);
  372. }
  373. return state;
  374. }
  375. }
  376. public static DateTime ParseTicks(long microseconds)
  377. {
  378. long value = microseconds;
  379. var ts = TimeSpan.FromMilliseconds(value);
  380. return new DateTime(1970, 1, 1, 0, 0, 0, 0, DateTimeKind.Utc).Add(ts).ToUniversalTime();
  381. }
  382. public static async Task<Job> GetJobAsync(string projectId, string jobId, string accessToken)
  383. {
  384. string uri = string.Format("https://www.googleapis.com/bigquery/v2/projects/{0}/jobs/{1}", projectId, jobId);
  385. var request = (HttpWebRequest)WebRequest.Create(uri);
  386. request.Method = "GET";
  387. request.Headers.Add("Authorization", "Bearer " + accessToken);
  388. request.ContentType = "application/json";
  389. using (WebResponse response = await request.GetResponseAsync())
  390. {
  391. string responseJson = new StreamReader(response.GetResponseStream()).ReadToEnd();
  392. var body = JsonConvert.DeserializeObject<JObject>(responseJson);
  393. var id = body.Value<string>("id").Split(':')[1];
  394. var state = body.Value<JObject>("status").Value<string>("state");
  395. var statistics = body.Value<JObject>("statistics");
  396. var totalBytesProcessed = statistics.Value<long>("totalBytesProcessed");
  397. var startTime = ParseTicks(statistics.Value<long>("startTime"));
  398. var endTime = ParseTicks(statistics.Value<long>("endTime"));
  399. return new Job(id, state, totalBytesProcessed, startTime, endTime);
  400. }
  401. }
  402. public static async Task<string> StartLoadJobAsync(string projectId, string sourceUri, string datasetId, string tableId, string accessToken, Field[] fields, long maxBadRecords = 0)
  403. {
  404. var uris = new JProperty("sourceUris", new JArray(sourceUri));
  405. var destinationTable = new JProperty("destinationTable", new JObject(new JProperty("projectId", projectId), new JProperty("datasetId", datasetId), new JProperty("tableId", tableId)));
  406. var allowQuotedLines = new JProperty("sourceFormat", "NEWLINE_DELIMITED_JSON");
  407. var badRecords = new JProperty("maxBadRecords", maxBadRecords);
  408. var configuration = new JObject {new JProperty("load", new JObject(uris, new JProperty("schema", new JObject(GetFields(fields))), destinationTable, allowQuotedLines, badRecords))};
  409. var doc = new JObject {{"projectId", projectId}, {"configuration", configuration}};
  410. string uri = string.Format("https://www.googleapis.com/bigquery/v2/projects/{0}/jobs", projectId);
  411. var request = (HttpWebRequest) WebRequest.Create(uri);
  412. request.Method = "POST";
  413. request.Headers.Add("Authorization", "Bearer " + accessToken);
  414. request.ContentType = "application/json";
  415. string requestJson = doc.ToString();
  416. byte[] buffer = Encoding.UTF8.GetBytes(requestJson);
  417. request.ContentLength = buffer.Length;
  418. request.GetRequestStream().Write(buffer, 0, buffer.Length);
  419. string responseJson;
  420. try
  421. {
  422. using (WebResponse response = await request.GetResponseAsync())
  423. {
  424. responseJson = new StreamReader(response.GetResponseStream()).ReadToEnd();
  425. var ret = JsonConvert.DeserializeObject<JObject>(responseJson);
  426. return ret.Value<JObject>("jobReference").Value<string>("jobId");
  427. }
  428. }
  429. catch (WebException ex)
  430. {
  431. if (ex.Response == null)
  432. {
  433. throw;
  434. }
  435. var status = (int) ((HttpWebResponse) ex.Response).StatusCode;
  436. if (status == 400)
  437. {
  438. responseJson = new StreamReader(ex.Response.GetResponseStream()).ReadToEnd();
  439. var errorMessage = JsonConvert.DeserializeObject<JObject>(responseJson).Value<JObject>("error").Value<string>("message");
  440. throw new Exception(errorMessage);
  441. }
  442. throw;
  443. }
  444. }
  445. public static JProperty GetFields(Field[] fields)
  446. {
  447. var jArray = new JArray(fields.Select(GetFieldSchema).ToArray());
  448. return new JProperty("fields", jArray);
  449. }
  450. private static JObject GetFieldSchema(Field f)
  451. {
  452. var ret = new JObject {{"name", f.Name}, {"type", f.Type}, {"mode", f.Mode}};
  453. if (f.Fields != null && f.Fields.Length > 0)
  454. {
  455. JProperty nestedFields = GetFields(f.Fields);
  456. ret.Add(nestedFields);
  457. }
  458. return ret;
  459. }
  460. }
  461. }