/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/JsonFilePageModelPipeline.java
Java | 56 lines | 38 code | 7 blank | 11 comment | 2 complexity | ceccdef63cdefc8dff4c53ee8f7d7d0d MD5 | raw file
- package us.codecraft.webmagic.pipeline;
- import com.alibaba.fastjson.JSON;
- import org.apache.commons.codec.digest.DigestUtils;
- import org.apache.commons.lang3.builder.ToStringBuilder;
- import org.slf4j.Logger;
- import org.slf4j.LoggerFactory;
- import us.codecraft.webmagic.Task;
- import us.codecraft.webmagic.model.HasKey;
- import us.codecraft.webmagic.utils.FilePersistentBase;
- import java.io.FileWriter;
- import java.io.IOException;
- import java.io.PrintWriter;
- /**
- * Store results objects (page models) to files in JSON format.<br>
- * Use model.getKey() as file name if the model implements HasKey.<br>
- * Otherwise use SHA1 as file name.
- *
- * @author code4crafter@gmail.com <br>
- * @since 0.2.0
- */
- public class JsonFilePageModelPipeline extends FilePersistentBase implements PageModelPipeline {
- private Logger logger = LoggerFactory.getLogger(getClass());
- /**
- * new JsonFilePageModelPipeline with default path "/data/webmagic/"
- */
- public JsonFilePageModelPipeline() {
- setPath("/data/webmagic/");
- }
- public JsonFilePageModelPipeline(String path) {
- setPath(path);
- }
- @Override
- public void process(Object o, Task task) {
- String path = this.path + PATH_SEPERATOR + task.getUUID() + PATH_SEPERATOR;
- try {
- String filename;
- if (o instanceof HasKey) {
- filename = path + ((HasKey) o).key() + ".json";
- } else {
- filename = path + DigestUtils.md5Hex(ToStringBuilder.reflectionToString(o)) + ".json";
- }
- PrintWriter printWriter = new PrintWriter(new FileWriter(getFile(filename)));
- printWriter.write(JSON.toJSONString(o));
- printWriter.close();
- } catch (IOException e) {
- logger.warn("write file error", e);
- }
- }
- }