/sandbox/ua/runtest/plugins/env.robot.js

https://gitlab.com/bruce.ng/alloy-ui · JavaScript · 95 lines · 64 code · 13 blank · 18 comment · 4 complexity · 879e80e1e86fcb055697451d89f4913f MD5 · raw file

  1. /**
  2. * @author thatcher
  3. */
  4. load('dist/env.rhino.js');
  5. load('plugins/jquery.js');
  6. function scrape(url, links){
  7. // scrape text from current document which we will
  8. // assign weights to in our search index
  9. var data = {
  10. $id: encodeURIComponent(url),
  11. url: url,
  12. full_text: $(document.body).text(),
  13. title: document.title,
  14. headings: $('h1, h2, h3, h4, h5, h6').text(),
  15. description: $('meta[name=description]').attr('content'),
  16. keywords: $('meta[name=keywords]').attr('content').split(',')
  17. };
  18. // find all the relavant links, but don't include any we
  19. // already have in our link array
  20. $('a[href]').each(function(){
  21. var href = $(this).attr('href');
  22. if($.inArray(href, links) == -1 && !href.match(/^(\s)*http|#/)){
  23. //we only want to crawl local links
  24. links.push(href);
  25. }
  26. });
  27. // save the record to our index
  28. $.ajax({
  29. url:'http://localhost:8080/rest/index/'+data.$id,
  30. contentType:'application/json',
  31. dataType:'json',
  32. type: 'post',
  33. async: false,
  34. data: JSON.stringify(data),
  35. processData: false,
  36. success: function(){
  37. console.log('indexed document %s', url);
  38. }
  39. });
  40. }
  41. $(function(){
  42. // delete the index to start fresh
  43. $.ajax({
  44. url:'http://localhost:8080/rest/index/',
  45. contentType:'application/json',
  46. dataType:'json',
  47. type:'delete',
  48. async: false,
  49. success: function(){
  50. console.log('deleted search index');
  51. }
  52. });
  53. // create the search index we will populate with
  54. // our simple crawl
  55. $.ajax({
  56. url:'http://localhost:8080/rest/index/',
  57. contentType:'application/json',
  58. dataType:'json',
  59. type:'put',
  60. async: false,
  61. success: function(){
  62. console.log('created search index');
  63. }
  64. });
  65. // create an array which we'll use
  66. // to store relavant links to crawl
  67. var links = [];
  68. // index this document
  69. scrape(document.location.toString(), links);
  70. // now crawl our links
  71. for(var i = 0; i < links.length; i++){
  72. try{
  73. // replaces this document with the document
  74. // from the link
  75. document.location = Envjs.uri(links[i]);
  76. scrape(links[i], links);
  77. }catch(e){
  78. console.log('failed to load %s \n %s', links[i], e);
  79. }
  80. }
  81. });
  82. window.location = 'http://localhost:8080/';