PageRenderTime 55ms CodeModel.GetById 30ms RepoModel.GetById 1ms app.codeStats 0ms

/unigrabber.pl

https://bitbucket.org/naegele/unigrabber
Perl | 115 lines | 83 code | 12 blank | 20 comment | 6 complexity | 35f7e4f02aa0d1395e064cde72bb8cb3 MD5 | raw file
  1. #!/usr/bin/perl
  2. # This script is free software by the terms of the GPLv3
  3. # Contact:
  4. # Daniel N?gele - daniel.n.wb@gmail.com
  5. # Thanks to okbr and FaKeller (bitbucket usernames) for the help with the stylesheet
  6. # Follow the comments here, also make sure the directory
  7. # /mirror/password (whatever you choose) is created before running the script.
  8. # Besides a running perl interpreter, this script needs the perl module
  9. # LWP::Simple and the linux utility tar (exchangable).
  10. use LWP::Simple;
  11. $url_file='./urls.txt';
  12. $html_file='./index.html';
  13. sub prepnext
  14. {
  15. $nr++;
  16. $full_url = $url[$i];
  17. if($nr > 9)
  18. {
  19. $full_url =~ s/0<NUM>/<NUM>/g;
  20. }
  21. $full_url =~ s/<NUM>/$nr/g;
  22. @temp = split(/\//, $full_url);
  23. $filename = $temp[-1];
  24. # ugly, but checks if there has been a number left out
  25. if(not head($full_url) and $_[0] == undef)
  26. {
  27. prepnext(1);
  28. }
  29. }
  30. while(True)
  31. {
  32. open(URL, $url_file);
  33. chomp(@url=<URL>); # read the url file by line
  34. close(URL);
  35. open(HTML, ">" . $html_file);
  36. # insert custom html title/stylesheet below,
  37. print HTML "
  38. <html>\n
  39. <head>\n
  40. <title>PDF-Links Informatik SS12</title>\n
  41. <link rel='stylesheet' type='text/css' href='style.css'/>\n
  42. </head>\n
  43. <body>\n
  44. <div id='content'>\n";
  45. # loop over each url
  46. for($i = 1; $i <= $#url; $i = $i + 2)
  47. {
  48. # this code assembles the path of the copies on your server
  49. $path = "mirror/" . $url[$i - 1];
  50. unless(-d $path)
  51. {
  52. mkdir $path;
  53. }
  54. # header for a collection of files
  55. print HTML "<h3>" . $url[$i - 1] . "</h3>\n";
  56. $nr = 0;
  57. prepnext();
  58. # loop over the files at the url
  59. while(head($full_url)) # checks availability
  60. {
  61. unless(-e $filename)
  62. {
  63. # download if it hasn't been downloaded in a previous run
  64. getstore($full_url, $path . "/" . $filename);
  65. }
  66. # prints the links to the output file, especially 'Blatt'
  67. if ($filename =~ /tex$/) {
  68. getstore($full_url, $path . "/" . $filename);
  69. print HTML "<p><a href='" . $full_url . "'>TexFile</a>\n";
  70. print HTML "<a class='alt' id='alt' href='./" . $path . "/" .
  71. $filename . "'>(mirror)</a></p>\n";
  72. $filename =~ s/tex/pdf/;
  73. print HTML "<p><a href='./" . $path . "/" .
  74. $filename . "'>PDF File (aktuell)</a></p>\n";
  75. }
  76. else {
  77. print HTML "<p><a href='" . $full_url . "'>Blatt " . $nr . "</a>\n";
  78. print HTML "<a class='alt' id='alt' href='./" . $path . "/" .
  79. $filename . "'>(mirror)</a></p>\n";
  80. }
  81. prepnext();
  82. }
  83. }
  84. # again, custom html, styles and the footer
  85. print HTML "
  86. <div style='clear: both;'></div>\n
  87. </div>\n
  88. <div id='footer'>\n
  89. <div class='grid_half lft'>\n
  90. <a href='./mirror/mirror.tar'>Archiv</a><br>\n
  91. <a href='https://bitbucket.org/naegele/unigrabber'>Source</a>
  92. </div>\n
  93. <div class='grid_half rgt'>\n
  94. Erstellt: " . scalar localtime() . "</div>\n</div>\n </body>\n</html>";
  95. close(HTML);
  96. # external command, providing the archive
  97. system("tar -cf mirror/mirror.tar ./mirror");
  98. system('rubber -d --into mirror/"DSA TexFile"/ mirror/"DSA TexFile"/scribe_notes.tex');
  99. print "Updated. \n";
  100. # ugly method below, cronjob recommended
  101. sleep(28800); # = 8h
  102. system('rm mirror/"DSA TexFile"/scribe_notes.tex');
  103. }