/node/etl/src/main/java/com/alibaba/otter/node/etl/common/pipe/impl/http/RowDataHttpPipe.java

https://github.com/itisaid/otter
Java | 344 lines | 269 code | 27 blank | 48 comment | 33 complexity | 469c0c2c7981e53b340e042b2a45da32 MD5 | raw file
  1. /*
  2. * Copyright (C) 2010-2101 Alibaba Group Holding Limited.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package com.alibaba.otter.node.etl.common.pipe.impl.http;
  17. import java.io.BufferedInputStream;
  18. import java.io.BufferedOutputStream;
  19. import java.io.File;
  20. import java.io.FileInputStream;
  21. import java.io.FileOutputStream;
  22. import java.io.IOException;
  23. import java.io.InputStream;
  24. import java.io.OutputStream;
  25. import java.text.MessageFormat;
  26. import java.text.SimpleDateFormat;
  27. import java.util.ArrayList;
  28. import java.util.Date;
  29. import java.util.List;
  30. import org.apache.commons.io.IOUtils;
  31. import org.apache.commons.lang.ClassUtils;
  32. import org.apache.commons.lang.StringUtils;
  33. import org.springframework.util.CollectionUtils;
  34. import com.alibaba.fastjson.JSONReader;
  35. import com.alibaba.otter.node.etl.common.io.EncryptedData;
  36. import com.alibaba.otter.node.etl.common.io.download.DataRetriever;
  37. import com.alibaba.otter.node.etl.common.pipe.PipeDataType;
  38. import com.alibaba.otter.node.etl.common.pipe.exception.PipeException;
  39. import com.alibaba.otter.node.etl.model.protobuf.BatchProto;
  40. import com.alibaba.otter.shared.common.model.config.channel.ChannelParameter.SyncConsistency;
  41. import com.alibaba.otter.shared.common.model.config.channel.ChannelParameter.SyncMode;
  42. import com.alibaba.otter.shared.common.model.config.pipeline.Pipeline;
  43. import com.alibaba.otter.shared.common.utils.ByteUtils;
  44. import com.alibaba.otter.shared.etl.model.DbBatch;
  45. import com.alibaba.otter.shared.etl.model.EventColumn;
  46. import com.alibaba.otter.shared.etl.model.EventData;
  47. import com.alibaba.otter.shared.etl.model.EventType;
  48. import com.alibaba.otter.shared.etl.model.FileBatch;
  49. import com.alibaba.otter.shared.etl.model.FileData;
  50. import com.alibaba.otter.shared.etl.model.Identity;
  51. import com.alibaba.otter.shared.etl.model.RowBatch;
  52. /**
  53. * 基于http下载的pipe实现
  54. *
  55. * @author jianghang 2011-10-13 下午06:31:13
  56. * @version 4.0.0
  57. */
  58. public class RowDataHttpPipe extends AbstractHttpPipe<DbBatch, HttpPipeKey> {
  59. public HttpPipeKey put(final DbBatch data) throws PipeException {
  60. return saveDbBatch(data);
  61. }
  62. public DbBatch get(final HttpPipeKey key) throws PipeException {
  63. // 处理dbBatch数据
  64. return getDbBatch(key);
  65. }
  66. // ======================== help method ===================
  67. // 保存对应的dbBatch
  68. private HttpPipeKey saveDbBatch(DbBatch dbBatch) {
  69. RowBatch rowBatch = dbBatch.getRowBatch();
  70. // 转化为proto对象
  71. BatchProto.RowBatch.Builder rowBatchBuilder = BatchProto.RowBatch.newBuilder();
  72. rowBatchBuilder.setIdentity(build(rowBatch.getIdentity()));
  73. // 处理具体的字段rowData
  74. for (EventData eventData : rowBatch.getDatas()) {
  75. BatchProto.RowData.Builder rowDataBuilder = BatchProto.RowData.newBuilder();
  76. rowDataBuilder.setPairId(eventData.getPairId());
  77. rowDataBuilder.setTableId(eventData.getTableId());
  78. if (eventData.getSchemaName() != null) {
  79. rowDataBuilder.setSchemaName(eventData.getSchemaName());
  80. }
  81. rowDataBuilder.setTableName(eventData.getTableName());
  82. rowDataBuilder.setEventType(eventData.getEventType().getValue());
  83. rowDataBuilder.setExecuteTime(eventData.getExecuteTime());
  84. // add by ljh at 2012-10-31
  85. if (eventData.getSyncMode() != null) {
  86. rowDataBuilder.setSyncMode(eventData.getSyncMode().getValue());
  87. }
  88. if (eventData.getSyncConsistency() != null) {
  89. rowDataBuilder.setSyncConsistency(eventData.getSyncConsistency().getValue());
  90. }
  91. // 构造key column
  92. for (EventColumn keyColumn : eventData.getKeys()) {
  93. rowDataBuilder.addKeys(buildColumn(keyColumn));
  94. }
  95. // 构造old key column
  96. if (CollectionUtils.isEmpty(eventData.getOldKeys()) == false) {
  97. for (EventColumn keyColumn : eventData.getOldKeys()) {
  98. rowDataBuilder.addOldKeys(buildColumn(keyColumn));
  99. }
  100. }
  101. // 构造其他 column
  102. for (EventColumn column : eventData.getColumns()) {
  103. rowDataBuilder.addColumns(buildColumn(column));
  104. }
  105. rowDataBuilder.setRemedy(eventData.isRemedy());
  106. rowDataBuilder.setSize(eventData.getSize());
  107. if (StringUtils.isNotEmpty(eventData.getSql())) {
  108. rowDataBuilder.setSql(eventData.getSql());
  109. }
  110. if (StringUtils.isNotEmpty(eventData.getDdlSchemaName())) {
  111. rowDataBuilder.setDdlSchemaName(eventData.getDdlSchemaName());
  112. }
  113. rowBatchBuilder.addRows(rowDataBuilder.build());// 添加一条rowData记录
  114. }
  115. // 处理下FileBatch
  116. FileBatch fileBatch = dbBatch.getFileBatch();
  117. BatchProto.FileBatch.Builder fileBatchBuilder = null;
  118. fileBatchBuilder = BatchProto.FileBatch.newBuilder();
  119. fileBatchBuilder.setIdentity(build(fileBatch.getIdentity()));
  120. // 构造对应的proto对象
  121. for (FileData fileData : fileBatch.getFiles()) {
  122. BatchProto.FileData.Builder fileDataBuilder = BatchProto.FileData.newBuilder();
  123. fileDataBuilder.setPairId(fileData.getPairId());
  124. fileDataBuilder.setTableId(fileData.getTableId());
  125. if (fileData.getNameSpace() != null) {
  126. fileDataBuilder.setNamespace(fileData.getNameSpace());
  127. }
  128. if (fileData.getPath() != null) {
  129. fileDataBuilder.setPath(fileData.getPath());
  130. }
  131. fileDataBuilder.setEventType(fileData.getEventType().getValue());
  132. fileDataBuilder.setSize(fileData.getSize());
  133. fileDataBuilder.setLastModifiedTime(fileData.getLastModifiedTime());
  134. fileBatchBuilder.addFiles(fileDataBuilder.build());// 添加一条fileData记录
  135. }
  136. // 处理构造对应的文件url
  137. String filename = buildFileName(rowBatch.getIdentity(), ClassUtils.getShortClassName(dbBatch.getClass()));
  138. // 写入数据
  139. File file = new File(htdocsDir, filename);
  140. OutputStream output = null;
  141. try {
  142. output = new BufferedOutputStream(new FileOutputStream(file));
  143. com.alibaba.otter.node.etl.model.protobuf.BatchProto.RowBatch rowBatchProto = rowBatchBuilder.build();
  144. output.write(ByteUtils.int2bytes(rowBatchProto.getSerializedSize()));//输出大小
  145. rowBatchProto.writeTo(output);//输出row batch
  146. com.alibaba.otter.node.etl.model.protobuf.BatchProto.FileBatch fileBatchProto = fileBatchBuilder.build();
  147. output.write(ByteUtils.int2bytes(fileBatchProto.getSerializedSize()));//输出大小
  148. fileBatchProto.writeTo(output); //输出file batch
  149. output.flush();
  150. } catch (IOException e) {
  151. throw new PipeException("write_byte_error", e);
  152. } finally {
  153. IOUtils.closeQuietly(output);
  154. }
  155. HttpPipeKey key = new HttpPipeKey();
  156. key.setUrl(remoteUrlBuilder.getUrl(rowBatch.getIdentity().getPipelineId(), filename));
  157. key.setDataType(PipeDataType.DB_BATCH);
  158. key.setIdentity(rowBatch.getIdentity());
  159. Pipeline pipeline = configClientService.findPipeline(rowBatch.getIdentity().getPipelineId());
  160. if (pipeline.getParameters().getUseFileEncrypt()) {
  161. // 加密处理
  162. EncryptedData encryptedData = encryptFile(file);
  163. key.setKey(encryptedData.getKey());
  164. key.setCrc(encryptedData.getCrc());
  165. }
  166. return key;
  167. }
  168. // 处理对应的dbBatch
  169. private DbBatch getDbBatch(HttpPipeKey key) {
  170. String dataUrl = key.getUrl();
  171. Pipeline pipeline = configClientService.findPipeline(key.getIdentity().getPipelineId());
  172. DataRetriever dataRetriever = dataRetrieverFactory.createRetriever(pipeline.getParameters().getRetriever(),
  173. dataUrl, downloadDir);
  174. File archiveFile = null;
  175. try {
  176. dataRetriever.connect();
  177. dataRetriever.doRetrieve();
  178. archiveFile = dataRetriever.getDataAsFile();
  179. } catch (Exception e) {
  180. dataRetriever.abort();
  181. throw new PipeException("download_error", e);
  182. } finally {
  183. dataRetriever.disconnect();
  184. }
  185. // 处理下有加密的数据
  186. if (StringUtils.isNotEmpty(key.getKey()) && StringUtils.isNotEmpty(key.getCrc())) {
  187. decodeFile(archiveFile, key.getKey(), key.getCrc());
  188. }
  189. InputStream input = null;
  190. JSONReader reader = null;
  191. try {
  192. input = new BufferedInputStream(new FileInputStream(archiveFile));
  193. DbBatch dbBatch = new DbBatch();
  194. byte[] lengthBytes = new byte[4];
  195. input.read(lengthBytes);
  196. int length = ByteUtils.bytes2int(lengthBytes);
  197. BatchProto.RowBatch rowbatchProto = BatchProto.RowBatch.parseFrom(new LimitedInputStream(input, length));
  198. // 构造原始的model对象
  199. RowBatch rowBatch = new RowBatch();
  200. rowBatch.setIdentity(build(rowbatchProto.getIdentity()));
  201. for (BatchProto.RowData rowDataProto : rowbatchProto.getRowsList()) {
  202. EventData eventData = new EventData();
  203. eventData.setPairId(rowDataProto.getPairId());
  204. eventData.setTableId(rowDataProto.getTableId());
  205. eventData.setTableName(rowDataProto.getTableName());
  206. eventData.setSchemaName(rowDataProto.getSchemaName());
  207. eventData.setEventType(EventType.valuesOf(rowDataProto.getEventType()));
  208. eventData.setExecuteTime(rowDataProto.getExecuteTime());
  209. // add by ljh at 2012-10-31
  210. if (StringUtils.isNotEmpty(rowDataProto.getSyncMode())) {
  211. eventData.setSyncMode(SyncMode.valuesOf(rowDataProto.getSyncMode()));
  212. }
  213. if (StringUtils.isNotEmpty(rowDataProto.getSyncConsistency())) {
  214. eventData.setSyncConsistency(SyncConsistency.valuesOf(rowDataProto.getSyncConsistency()));
  215. }
  216. // 处理主键
  217. List<EventColumn> keys = new ArrayList<EventColumn>();
  218. for (BatchProto.Column columnProto : rowDataProto.getKeysList()) {
  219. keys.add(buildColumn(columnProto));
  220. }
  221. eventData.setKeys(keys);
  222. // 处理old主键
  223. if (CollectionUtils.isEmpty(rowDataProto.getOldKeysList()) == false) {
  224. List<EventColumn> oldKeys = new ArrayList<EventColumn>();
  225. for (BatchProto.Column columnProto : rowDataProto.getOldKeysList()) {
  226. oldKeys.add(buildColumn(columnProto));
  227. }
  228. eventData.setOldKeys(oldKeys);
  229. }
  230. // 处理具体的column value
  231. List<EventColumn> columns = new ArrayList<EventColumn>();
  232. for (BatchProto.Column columnProto : rowDataProto.getColumnsList()) {
  233. columns.add(buildColumn(columnProto));
  234. }
  235. eventData.setColumns(columns);
  236. eventData.setRemedy(rowDataProto.getRemedy());
  237. eventData.setSize(rowDataProto.getSize());
  238. eventData.setSql(rowDataProto.getSql());
  239. eventData.setDdlSchemaName(rowDataProto.getDdlSchemaName());
  240. // 添加到总记录
  241. rowBatch.merge(eventData);
  242. }
  243. dbBatch.setRowBatch(rowBatch);
  244. input.read(lengthBytes);
  245. length = ByteUtils.bytes2int(lengthBytes);
  246. BatchProto.FileBatch filebatchProto = BatchProto.FileBatch.parseFrom(new LimitedInputStream(input, length));
  247. // 构造原始的model对象
  248. FileBatch fileBatch = new FileBatch();
  249. fileBatch.setIdentity(build(filebatchProto.getIdentity()));
  250. for (BatchProto.FileData fileDataProto : filebatchProto.getFilesList()) {
  251. FileData fileData = new FileData();
  252. fileData.setPairId(fileDataProto.getPairId());
  253. fileData.setTableId(fileDataProto.getTableId());
  254. fileData.setEventType(EventType.valuesOf(fileDataProto.getEventType()));
  255. fileData.setLastModifiedTime(fileDataProto.getLastModifiedTime());
  256. fileData.setNameSpace(fileDataProto.getNamespace());
  257. fileData.setPath(fileDataProto.getPath());
  258. fileData.setSize(fileDataProto.getSize());
  259. // 添加到filebatch中
  260. fileBatch.getFiles().add(fileData);
  261. }
  262. dbBatch.setFileBatch(fileBatch);
  263. return dbBatch;
  264. } catch (IOException e) {
  265. throw new PipeException("deserial_error", e);
  266. } finally {
  267. IOUtils.closeQuietly(reader);
  268. }
  269. }
  270. private EventColumn buildColumn(BatchProto.Column columnProto) {
  271. EventColumn column = new EventColumn();
  272. column.setColumnName(columnProto.getName());
  273. column.setNull(columnProto.getIsNull());
  274. column.setColumnType(columnProto.getType());
  275. column.setColumnValue(columnProto.getValue());
  276. column.setKey(columnProto.getIsPrimaryKey());
  277. column.setIndex(columnProto.getIndex());
  278. column.setUpdate(columnProto.getIsUpdate());// add by ljh 2012-08-30,标记变更字段
  279. return column;
  280. }
  281. private BatchProto.Column buildColumn(EventColumn keyColumn) {
  282. BatchProto.Column.Builder columnBuilder = BatchProto.Column.newBuilder();
  283. columnBuilder.setName(keyColumn.getColumnName());
  284. columnBuilder.setType(keyColumn.getColumnType());
  285. columnBuilder.setIsNull(keyColumn.isNull());
  286. columnBuilder.setIsPrimaryKey(keyColumn.isKey());
  287. columnBuilder.setIndex(keyColumn.getIndex());
  288. if (keyColumn.getColumnValue() != null) {
  289. columnBuilder.setValue(keyColumn.getColumnValue());
  290. }
  291. columnBuilder.setIsUpdate(keyColumn.isUpdate());// add by ljh 2012-08-30,标记变更字段
  292. return columnBuilder.build();
  293. }
  294. // 构造文件名
  295. private String buildFileName(Identity identity, String prefix) {
  296. Date now = new Date();
  297. String time = new SimpleDateFormat(DATE_FORMAT).format(now);
  298. return MessageFormat.format("{0}-{1}-{2}-{3}-{4}.gzip", prefix, time, String.valueOf(identity.getChannelId()),
  299. String.valueOf(identity.getPipelineId()), String.valueOf(identity.getProcessId()));
  300. }
  301. // 构造proto对象
  302. private BatchProto.Identity build(Identity identity) {
  303. BatchProto.Identity.Builder identityBuilder = BatchProto.Identity.newBuilder();
  304. identityBuilder.setChannelId(identity.getChannelId());
  305. identityBuilder.setPipelineId(identity.getPipelineId());
  306. identityBuilder.setProcessId(identity.getProcessId());
  307. return identityBuilder.build();
  308. }
  309. // 从proto对象构造回object
  310. private Identity build(BatchProto.Identity identityProto) {
  311. Identity identity = new Identity();
  312. identity.setChannelId(identityProto.getChannelId());
  313. identity.setPipelineId(identityProto.getPipelineId());
  314. identity.setProcessId(identityProto.getProcessId());
  315. return identity;
  316. }
  317. }