PageRenderTime 28ms CodeModel.GetById 16ms app.highlight 11ms RepoModel.GetById 0ms app.codeStats 0ms

/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java

https://gitlab.com/taichu/webmagic
Java | 128 lines | 105 code | 17 blank | 6 comment | 7 complexity | be0cf94a32fdee8e55f232be6bb0b2e6 MD5 | raw file
  1package us.codecraft.webmagic.scheduler;
  2
  3import com.alibaba.fastjson.JSON;
  4import org.apache.commons.codec.digest.DigestUtils;
  5import redis.clients.jedis.Jedis;
  6import redis.clients.jedis.JedisPool;
  7import redis.clients.jedis.JedisPoolConfig;
  8import us.codecraft.webmagic.Request;
  9import us.codecraft.webmagic.Task;
 10import us.codecraft.webmagic.scheduler.component.DuplicateRemover;
 11
 12/**
 13 * Use Redis as url scheduler for distributed crawlers.<br>
 14 *
 15 * @author code4crafter@gmail.com <br>
 16 * @since 0.2.0
 17 */
 18public class RedisScheduler extends DuplicateRemovedScheduler implements MonitorableScheduler, DuplicateRemover {
 19
 20    private JedisPool pool;
 21
 22    private static final String QUEUE_PREFIX = "queue_";
 23
 24    private static final String SET_PREFIX = "set_";
 25
 26    private static final String ITEM_PREFIX = "item_";
 27
 28    public RedisScheduler(String host) {
 29        this(new JedisPool(new JedisPoolConfig(), host));
 30    }
 31
 32    public RedisScheduler(JedisPool pool) {
 33        this.pool = pool;
 34        setDuplicateRemover(this);
 35    }
 36
 37    @Override
 38    public void resetDuplicateCheck(Task task) {
 39        Jedis jedis = pool.getResource();
 40        try {
 41            jedis.del(getSetKey(task));
 42        } finally {
 43            pool.returnResource(jedis);
 44        }
 45    }
 46
 47    @Override
 48    public boolean isDuplicate(Request request, Task task) {
 49        Jedis jedis = pool.getResource();
 50        try {
 51            boolean isDuplicate = jedis.sismember(getSetKey(task), request.getUrl());
 52            if (!isDuplicate) {
 53                jedis.sadd(getSetKey(task), request.getUrl());
 54            }
 55            return isDuplicate;
 56        } finally {
 57            pool.returnResource(jedis);
 58        }
 59
 60    }
 61
 62    @Override
 63    protected void pushWhenNoDuplicate(Request request, Task task) {
 64        Jedis jedis = pool.getResource();
 65        try {
 66            jedis.rpush(getQueueKey(task), request.getUrl());
 67            if (request.getExtras() != null) {
 68                String field = DigestUtils.shaHex(request.getUrl());
 69                String value = JSON.toJSONString(request);
 70                jedis.hset((ITEM_PREFIX + task.getUUID()), field, value);
 71            }
 72        } finally {
 73            pool.returnResource(jedis);
 74        }
 75    }
 76
 77    @Override
 78    public synchronized Request poll(Task task) {
 79        Jedis jedis = pool.getResource();
 80        try {
 81            String url = jedis.lpop(getQueueKey(task));
 82            if (url == null) {
 83                return null;
 84            }
 85            String key = ITEM_PREFIX + task.getUUID();
 86            String field = DigestUtils.shaHex(url);
 87            byte[] bytes = jedis.hget(key.getBytes(), field.getBytes());
 88            if (bytes != null) {
 89                Request o = JSON.parseObject(new String(bytes), Request.class);
 90                return o;
 91            }
 92            Request request = new Request(url);
 93            return request;
 94        } finally {
 95            pool.returnResource(jedis);
 96        }
 97    }
 98
 99    protected String getSetKey(Task task) {
100        return SET_PREFIX + task.getUUID();
101    }
102
103    protected String getQueueKey(Task task) {
104        return QUEUE_PREFIX + task.getUUID();
105    }
106
107    @Override
108    public int getLeftRequestsCount(Task task) {
109        Jedis jedis = pool.getResource();
110        try {
111            Long size = jedis.llen(getQueueKey(task));
112            return size.intValue();
113        } finally {
114            pool.returnResource(jedis);
115        }
116    }
117
118    @Override
119    public int getTotalRequestsCount(Task task) {
120        Jedis jedis = pool.getResource();
121        try {
122            Long size = jedis.scard(getSetKey(task));
123            return size.intValue();
124        } finally {
125            pool.returnResource(jedis);
126        }
127    }
128}