PageRenderTime 47ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/JsonFilePipeline.java

https://gitlab.com/taichu/webmagic
Java | 47 lines | 31 code | 7 blank | 9 comment | 0 complexity | 42079412d040ed37c73b8a606d3a3994 MD5 | raw file
  1. package us.codecraft.webmagic.pipeline;
  2. import com.alibaba.fastjson.JSON;
  3. import org.apache.commons.codec.digest.DigestUtils;
  4. import org.slf4j.Logger;
  5. import org.slf4j.LoggerFactory;
  6. import us.codecraft.webmagic.ResultItems;
  7. import us.codecraft.webmagic.Task;
  8. import us.codecraft.webmagic.utils.FilePersistentBase;
  9. import java.io.FileWriter;
  10. import java.io.IOException;
  11. import java.io.PrintWriter;
  12. /**
  13. * Store results to files in JSON format.<br>
  14. *
  15. * @author code4crafter@gmail.com <br>
  16. * @since 0.2.0
  17. */
  18. public class JsonFilePipeline extends FilePersistentBase implements Pipeline {
  19. private Logger logger = LoggerFactory.getLogger(getClass());
  20. /**
  21. * new JsonFilePageModelPipeline with default path "/data/webmagic/"
  22. */
  23. public JsonFilePipeline() {
  24. setPath("/data/webmagic");
  25. }
  26. public JsonFilePipeline(String path) {
  27. setPath(path);
  28. }
  29. @Override
  30. public void process(ResultItems resultItems, Task task) {
  31. String path = this.path + PATH_SEPERATOR + task.getUUID() + PATH_SEPERATOR;
  32. try {
  33. PrintWriter printWriter = new PrintWriter(new FileWriter(getFile(path + DigestUtils.md5Hex(resultItems.getRequest().getUrl()) + ".json")));
  34. printWriter.write(JSON.toJSONString(resultItems.getAll()));
  35. printWriter.close();
  36. } catch (IOException e) {
  37. logger.warn("write file error", e);
  38. }
  39. }
  40. }