/BigQueryClient.cs
C# | 563 lines | 439 code | 102 blank | 22 comment | 30 complexity | 218a9a217bbb5bf9a9b3ad338565cb64 MD5 | raw file
- using System;
- using System.Collections.Generic;
- using System.Collections.Specialized;
- using System.Diagnostics;
- using System.IO;
- using System.Linq;
- using System.Net;
- using System.Text;
- using System.Threading;
- using System.Threading.Tasks;
- using Newtonsoft.Json;
- using Newtonsoft.Json.Linq;
- namespace Staq.Google.BigQuery
- {
- public class QueryResponse
- {
- public QueryResponse(string jobId)
- {
- //if (String.IsNullOrWhiteSpace(jobId)) throw new ArgumentNullException("jobId");
- JobId = jobId;
- JobComplete = false;
- }
- public QueryResponse(Field[] fields, Row[] rows, string jobId, long totalRows)
- {
- if (fields == null) throw new ArgumentNullException("fields");
- if (rows == null) throw new ArgumentNullException("rows");
- if (String.IsNullOrWhiteSpace(jobId)) throw new ArgumentNullException("jobId");
- Fields = fields;
- Rows = rows;
- JobId = jobId;
- TotalRows = totalRows;
- JobComplete = true;
- }
- public Field[] Fields { get; private set; }
- public Row[] Rows { get; private set; }
- public string JobId { get; private set; }
- public bool JobComplete { get; private set; }
- public long TotalRows { get; private set; }
- }
- public class Field
- {
- public Field(string name, string type, string mode, Field[] fields = null)
- {
- Name = name;
- Type = type;
- Mode = mode;
- Fields = fields;
- }
- public string Name { get; private set; }
- public string Type { get; private set; }
- public string Mode { get; private set; }
- public Field[] Fields { get; set; }
- }
- public class Row
- {
- public Row(object[] values)
- {
- Values = values;
- }
- public object[] Values { get; private set; }
- }
- public class BigQueryClient
- {
- static BigQueryClient()
- {
- ServicePointManager.DefaultConnectionLimit = int.MaxValue;
- ServicePointManager.UseNagleAlgorithm = false;
- ServicePointManager.Expect100Continue = false;
- }
- public static string GetCodeUrl(string clientId, string redirectUri = "urn:ietf:wg:oauth:2.0:oob", string scope = "https://www.googleapis.com/auth/bigquery")
- {
- return String.Format("https://accounts.google.com/o/oauth2/auth?response_type=code&client_id={0}&redirect_uri={1}&scope={2}", clientId, redirectUri, scope);
- }
- public static void GetRefreshToken(string code, string clientId, string clientSecret, out string accessToken, out string refreshToken, string redirectUri = "urn:ietf:wg:oauth:2.0:oob")
- {
- const string uri = @"https://accounts.google.com/o/oauth2/token";
- var client = new WebClient();
- var values = new NameValueCollection {{"client_id", clientId}, {"client_secret", clientSecret}, {"grant_type", "authorization_code"}, {"redirect_uri", redirectUri}, {"code", code}};
- Byte[] responseBytes = client.UploadValues(uri, values);
- string json = Encoding.UTF8.GetString(responseBytes);
- var ret = (JObject) JsonConvert.DeserializeObject(json);
- accessToken = (string) ret.GetValue("access_token");
- refreshToken = (string) ret.GetValue("refresh_token");
- }
- public static async Task<string> GetAccessTokenAsync(string clientId, string clientSecret, string refreshToken)
- {
- const string uri = @"https://accounts.google.com/o/oauth2/token";
- var client = new WebClient();
- var values = new NameValueCollection {{"client_id", clientId}, {"client_secret", clientSecret}, {"grant_type", "refresh_token"}, {"refresh_token", refreshToken}};
- Byte[] responseBytes = await client.UploadValuesTaskAsync(uri, values);
- string json = Encoding.UTF8.GetString(responseBytes);
- var ret = (JObject) JsonConvert.DeserializeObject(json);
- var accessToken = (string) ret.GetValue("access_token");
- return accessToken;
- }
- public static string[] ListDatasets(string projectId, string accessToken)
- {
- string uri = string.Format("https://www.googleapis.com/bigquery/v2/projects/{0}/datasets", projectId);
- var request = (HttpWebRequest) WebRequest.Create(uri);
- request.Method = "GET";
- request.Headers.Add("Authorization", "Bearer " + accessToken);
- string json;
- using (WebResponse response = request.GetResponse())
- {
- json = new StreamReader(response.GetResponseStream()).ReadToEnd();
- }
- var ret = JsonConvert.DeserializeObject<JObject>(json);
- return ret.Value<JArray>("datasets").Values<JObject>().Select(j => j.Value<string>("id").Split(':')[1]).ToArray();
- }
- public class Job
- {
- public Job(string jobId, string status, long totalBytesProcessed, DateTime startTime, DateTime endTime)
- {
- JobId = jobId;
- State = status;
- TotalBytesProcessed = totalBytesProcessed;
- StartTime = startTime;
- EndTime = endTime;
- }
- public string JobId { get; private set; }
- public string State { get; private set; }
- public long TotalBytesProcessed { get; private set; }
- public DateTime StartTime { get; private set; }
- public DateTime EndTime { get; private set; }
- public long TotalMilliseconds { get
- {
- return (long) (EndTime - StartTime).TotalMilliseconds;
- }}
- }
- public static Job[] ListJobs(string projectId, string accessToken)
- {
- string uri = string.Format("https://www.googleapis.com/bigquery/v2/projects/{0}/jobs", projectId);
- var request = (HttpWebRequest)WebRequest.Create(uri);
- request.Method = "GET";
- request.Headers.Add("Authorization", "Bearer " + accessToken);
- string json;
- using (WebResponse response = request.GetResponse())
- {
- json = new StreamReader(response.GetResponseStream()).ReadToEnd();
- }
- // "jobs": [
- //{
- // "id": "1025130940872:job_5838b5f8ae584dce8cb8b47834802d6b",
- // "kind": "bigquery#job",
- // "jobReference": {
- // "projectId": "1025130940872",
- // "jobId": "job_5838b5f8ae584dce8cb8b47834802d6b"
- // },
- // "state": "DONE",
- // "statistics": {
- // "startTime": "1355792636745",
- // "endTime": "1355792705816",
- // "totalBytesProcessed": "408367008",
- // "query": {
- // "totalBytesProcessed": "408367008"
- // }
- // },
- // "status": {
- // "state": "DONE"
- // }
- //},
- var ret = JsonConvert.DeserializeObject<JObject>(json);
- return ret.Value<JArray>("jobs").Values<JObject>().Select(j =>
- {
- var id = j.Value<string>("id").Split(':')[1];
- var state = j.Value<string>("state");
- var statistics = j.Value<JObject>("statistics");
- var totalBytesProcessed = statistics.Value<long>("totalBytesProcessed");
- var startTime = ParseTicks(statistics.Value<long>("startTime"));
- var endTime = ParseTicks(statistics.Value<long>("endTime"));
- return new Job(id, state, totalBytesProcessed,startTime,endTime);
- }).ToArray();
- }
- public static string[] ListTables(string projectId, string datasetId, string accessToken)
- {
- string uri = string.Format("https://www.googleapis.com/bigquery/v2/projects/{0}/datasets/{1}/tables", projectId, datasetId);
- var request = (HttpWebRequest) WebRequest.Create(uri);
- request.Method = "GET";
- request.Headers.Add("Authorization", "Bearer " + accessToken);
- string json;
- using (WebResponse response = request.GetResponse())
- {
- json = new StreamReader(response.GetResponseStream()).ReadToEnd();
- }
- var ret = JsonConvert.DeserializeObject<JObject>(json);
- return ret.Value<JArray>("tables").Values<JObject>().Select(j => j.Value<string>("id").Split(':')[1]).ToArray();
- }
- public static async Task<string> StartQueryAsync(string projectId, string datasetId, string query, string accessToken)
- {
- QueryResponse ret = await QueryAsync(projectId, datasetId, query, accessToken, 0, 0, false);
- return ret.JobId;
- }
- public static async Task<QueryResponse> QueryAsync(string projectId, string datasetId, string query, string accessToken, long timeoutMs, long maxResults = 1000, bool waitForCompletition = true, bool dryRun = false)
- {
- var doc = new JObject {
- {"kind", "bigquery#queryRequest"},
- {"query", query},
- {"maxResults", maxResults},
- {"defaultDataset", new JObject {{"datasetId", datasetId},{"projectId", projectId}}},
- {"timeoutMs", timeoutMs}
- };
- if (dryRun)
- doc.Add(new JProperty("dryrun",true));
- string uri = string.Format("https://www.googleapis.com/bigquery/v2/projects/{0}/queries", projectId);
- var request = (HttpWebRequest) WebRequest.Create(uri);
- request.Method = "POST";
- request.Headers.Add("Authorization", "Bearer " + accessToken);
- request.ContentType = "application/json";
- string requestJson = doc.ToString();
- byte[] buffer = Encoding.UTF8.GetBytes(requestJson);
- request.ContentLength = buffer.Length;
- request.GetRequestStream().Write(buffer, 0, buffer.Length);
- string responseJson;
- try
- {
- using (WebResponse response = await request.GetResponseAsync())
- {
- responseJson = new StreamReader(response.GetResponseStream()).ReadToEnd();
- }
- }
- catch (WebException webException)
- {
- HandleWebException(webException);
- throw;
- }
- var body = JsonConvert.DeserializeObject<JObject>(responseJson);
- var jobId = body.Value<JObject>("jobReference").Value<string>("jobId");
- var isComplete = body.Value<bool>("jobComplete");
- Debug.WriteLine("[{2}] QueryAsync\t{0}\t{1}", jobId, isComplete, Thread.CurrentThread.ManagedThreadId);
- long totalRows = 0;
- Field[] fields = null;
- Row[] rows = null;
- if (isComplete)
- {
- totalRows = body.Value<long>("totalRows");
- fields = ParseFields(body.Value<JObject>("schema").Value<JArray>("fields"));
- rows = ParseRowsFromResponse(body, fields);
- }
- else
- {
- if (!waitForCompletition)
- return new QueryResponse(jobId);
- return await GetQueryResponseAsync(projectId, jobId, accessToken, maxResults);
- }
- return new QueryResponse(fields, rows, jobId, totalRows);
- }
- private static void HandleWebException(WebException webException)
- {
- if (webException.Response != null)
- {
- string responseJson = new StreamReader(webException.Response.GetResponseStream()).ReadToEnd();
- var errorMessage = JsonConvert.DeserializeObject<JObject>(responseJson).Value<JObject>("error").Value<string>("message");
-
- throw new ArgumentException(errorMessage,webException);
- }
- throw webException;
- }
- public static async Task<QueryResponse> GetQueryResponseAsync(string projectId, string jobId, string accessToken, long pageSize = 1000)
- {
- int returnedRows = 0;
- long totalRows = 0;
- var ret = new List<Row>();
- Field[] fields = null;
- Row[] rows = null;
- while (true)
- {
- bool isComplete;
- do
- {
- QueryResponse response = await GetQueryResultAsync(projectId, jobId, accessToken, returnedRows, pageSize);
- isComplete = response.JobComplete;
- if (isComplete)
- {
- totalRows = response.TotalRows;
- fields = response.Fields;
- rows = response.Rows;
- }
- else
- {
- Thread.Sleep(100);
- }
- } while (!isComplete);
- returnedRows += rows.Length;
- ret.AddRange(rows);
- if (returnedRows == totalRows)
- break;
- }
- return new QueryResponse(fields, ret.ToArray(), jobId, totalRows);
- }
- private static Row[] ParseRowsFromResponse(JObject ret, Field[] fields)
- {
- var rows = ret.Value<JArray>("rows");
- if (rows == null)
- return new Row[0];
- return rows.Select(row => row.Value<JArray>("f")).Select(f =>
- {
- JValue[] values = f.Values<JValue>("v").ToArray();
- object[] parsedValues = values.Select((v, i) =>
- {
- string type = fields[i].Type;
- switch (type)
- {
- case "INTEGER":
- return (Object) ((int?) v);
- case "STRING":
- return (Object) ((string) v);
- case "FLOAT":
- return (Object) ((double?) v);
- case "BOOLEAN":
- return (Object) ((bool?) v);
- default:
- throw new NotImplementedException();
- }
- }).ToArray();
- return new Row(parsedValues);
- }).ToArray();
- }
- public static Field[] ParseFields(JArray a)
- {
- return a.Select(f =>
- {
- var fieldsArray = f.Value<JArray>("fields");
- if (fieldsArray == null)
- {
- return new Field(f.Value<string>("name"), f.Value<string>("type"), f.Value<string>("mode"));
- }
- else
- {
- return new Field(f.Value<string>("name"), f.Value<string>("type"), f.Value<string>("mode"), ParseFields(fieldsArray));
- }
- }).ToArray();
- }
- private static async Task<QueryResponse> GetQueryResultAsync(string projectId, string jobId, string accessToken, long startIndex = 0, long pageSize = 1000)
- {
- string uri = string.Format("https://www.googleapis.com/bigquery/v2/projects/{0}/queries/{1}?startIndex={2}&maxResults={3}", projectId, jobId, startIndex, pageSize);
- var request = (HttpWebRequest) WebRequest.Create(uri);
- request.Method = "GET";
- request.Headers.Add("Authorization", "Bearer " + accessToken);
- request.ContentType = "application/json";
- using (WebResponse response = await request.GetResponseAsync())
- {
- string responseJson = new StreamReader(response.GetResponseStream()).ReadToEnd();
- var ret = JsonConvert.DeserializeObject<JObject>(responseJson);
- var jobComplete = ret.Value<bool>("jobComplete");
- Debug.WriteLine("[{2}] GetQueryResultAsync\t{0}\t{1}", jobId, jobComplete, Thread.CurrentThread.ManagedThreadId);
- if (jobComplete)
- {
- var totalRows = ret.Value<long>("totalRows");
- Field[] fields = ParseFields(ret.Value<JObject>("schema").Value<JArray>("fields"));
- Row[] rows = ParseRowsFromResponse(ret, fields);
- return new QueryResponse(fields, rows, jobId, totalRows);
- }
- return new QueryResponse(jobId);
- }
- }
- public static async Task<string> GetJobStatusAsync(string projectId, string jobId, string accessToken)
- {
- string uri = string.Format("https://www.googleapis.com/bigquery/v2/projects/{0}/jobs/{1}", projectId, jobId);
- var request = (HttpWebRequest) WebRequest.Create(uri);
- request.Method = "GET";
- request.Headers.Add("Authorization", "Bearer " + accessToken);
- request.ContentType = "application/json";
- using (WebResponse response = await request.GetResponseAsync())
- {
- string responseJson = new StreamReader(response.GetResponseStream()).ReadToEnd();
- var body = JsonConvert.DeserializeObject<JObject>(responseJson);
- var status = body.Value<JObject>("status");
- var state = status.Value<string>("state");
- if (status.Value<JObject>("errorResult") != null)
- {
- var errorMessagge = status.Value<JObject>("errorResult").Value<string>("message");
- throw new Exception(errorMessagge);
- }
- return state;
- }
- }
- public static DateTime ParseTicks(long microseconds)
- {
- long value = microseconds;
- var ts = TimeSpan.FromMilliseconds(value);
- return new DateTime(1970, 1, 1, 0, 0, 0, 0, DateTimeKind.Utc).Add(ts).ToUniversalTime();
- }
- public static async Task<Job> GetJobAsync(string projectId, string jobId, string accessToken)
- {
- string uri = string.Format("https://www.googleapis.com/bigquery/v2/projects/{0}/jobs/{1}", projectId, jobId);
- var request = (HttpWebRequest)WebRequest.Create(uri);
- request.Method = "GET";
- request.Headers.Add("Authorization", "Bearer " + accessToken);
- request.ContentType = "application/json";
- using (WebResponse response = await request.GetResponseAsync())
- {
- string responseJson = new StreamReader(response.GetResponseStream()).ReadToEnd();
- var body = JsonConvert.DeserializeObject<JObject>(responseJson);
- var id = body.Value<string>("id").Split(':')[1];
- var state = body.Value<JObject>("status").Value<string>("state");
- var statistics = body.Value<JObject>("statistics");
- var totalBytesProcessed = statistics.Value<long>("totalBytesProcessed");
- var startTime = ParseTicks(statistics.Value<long>("startTime"));
- var endTime = ParseTicks(statistics.Value<long>("endTime"));
- return new Job(id, state, totalBytesProcessed, startTime, endTime);
- }
- }
- public static async Task<string> StartLoadJobAsync(string projectId, string sourceUri, string datasetId, string tableId, string accessToken, Field[] fields, long maxBadRecords = 0)
- {
- var uris = new JProperty("sourceUris", new JArray(sourceUri));
- var destinationTable = new JProperty("destinationTable", new JObject(new JProperty("projectId", projectId), new JProperty("datasetId", datasetId), new JProperty("tableId", tableId)));
- var allowQuotedLines = new JProperty("sourceFormat", "NEWLINE_DELIMITED_JSON");
- var badRecords = new JProperty("maxBadRecords", maxBadRecords);
- var configuration = new JObject {new JProperty("load", new JObject(uris, new JProperty("schema", new JObject(GetFields(fields))), destinationTable, allowQuotedLines, badRecords))};
- var doc = new JObject {{"projectId", projectId}, {"configuration", configuration}};
- string uri = string.Format("https://www.googleapis.com/bigquery/v2/projects/{0}/jobs", projectId);
- var request = (HttpWebRequest) WebRequest.Create(uri);
- request.Method = "POST";
- request.Headers.Add("Authorization", "Bearer " + accessToken);
- request.ContentType = "application/json";
- string requestJson = doc.ToString();
- byte[] buffer = Encoding.UTF8.GetBytes(requestJson);
- request.ContentLength = buffer.Length;
- request.GetRequestStream().Write(buffer, 0, buffer.Length);
- string responseJson;
- try
- {
- using (WebResponse response = await request.GetResponseAsync())
- {
- responseJson = new StreamReader(response.GetResponseStream()).ReadToEnd();
- var ret = JsonConvert.DeserializeObject<JObject>(responseJson);
- return ret.Value<JObject>("jobReference").Value<string>("jobId");
- }
- }
- catch (WebException ex)
- {
- if (ex.Response == null)
- {
- throw;
- }
- var status = (int) ((HttpWebResponse) ex.Response).StatusCode;
- if (status == 400)
- {
- responseJson = new StreamReader(ex.Response.GetResponseStream()).ReadToEnd();
- var errorMessage = JsonConvert.DeserializeObject<JObject>(responseJson).Value<JObject>("error").Value<string>("message");
- throw new Exception(errorMessage);
- }
- throw;
- }
- }
- public static JProperty GetFields(Field[] fields)
- {
- var jArray = new JArray(fields.Select(GetFieldSchema).ToArray());
- return new JProperty("fields", jArray);
- }
- private static JObject GetFieldSchema(Field f)
- {
- var ret = new JObject {{"name", f.Name}, {"type", f.Type}, {"mode", f.Mode}};
- if (f.Fields != null && f.Fields.Length > 0)
- {
- JProperty nestedFields = GetFields(f.Fields);
- ret.Add(nestedFields);
- }
- return ret;
- }
- }
- }