/daijisen.js

https://github.com/shinout/kanabun-champ · JavaScript · 83 lines · 72 code · 11 blank · 0 comment · 10 complexity · 9fc56a5315f7940f3802bab23aa302fc MD5 · raw file

  1. var Junjo = require('./lib/Junjo/Junjo');
  2. var u2r = require('./lib/url2request');
  3. var http = require('http');
  4. var baseurl = "http://kotobank.jp/dictionary/daijisen/";
  5. function main() {
  6. var $j = new Junjo({timeout: 0});
  7. for (var i=1; i<= 3555; i++) {
  8. $j(collectFromPage(baseurl + i + '/'));
  9. }
  10. $j.run();
  11. }
  12. function collectFromPage(url) {
  13. var $j = new Junjo({timeout: 0});
  14. $j.inputs({
  15. url : 0
  16. });
  17. $j('request', function(url) {
  18. var options = u2r(url);
  19. var req = http.request(options, this.cb);
  20. req.end();
  21. req.on("error", this.fail.bind(this));
  22. });
  23. $j('response', function(res) {
  24. this.absorbData(res);
  25. })
  26. .firstError('shift')
  27. .after();
  28. $j('collect', function(html) {
  29. var ret = html.split('</a></li>').map(function(v) {
  30. var n = v.lastIndexOf('>');
  31. var word = v.slice(n+1)
  32. .replace(//g, '')
  33. .replace(//g, '');
  34. var n2 = word.indexOf('【');
  35. var n3 = word.indexOf('〔');
  36. if (n2 >= 0) word = word.slice(0, n2);
  37. if (n3 >= 0) word = word.slice(0, n3);
  38. return word;
  39. })
  40. .filter(function(v) {
  41. if (v.match(/[a-z]/)) return false;
  42. var len = v.length;
  43. return len >=6 && len <= 8;
  44. });
  45. for (var i=0; i<3; i++) {
  46. ret.shift();
  47. ret.pop();
  48. }
  49. return ret;
  50. })
  51. .out()
  52. .after();
  53. $j.on("end", function(err, out) {
  54. out.forEach(function(v) {
  55. console.log(v);
  56. });
  57. });
  58. return (url) ? $j.run(url): $j;
  59. }
  60. this.collect = function() {
  61. var url = baseurl + "93/";
  62. collectFromPage().run(url);
  63. };
  64. if (process.argv[1] == __filename) {
  65. var fname = process.argv[2];
  66. if (this[fname]) {
  67. this[fname]();
  68. }
  69. else {
  70. main();
  71. }
  72. }