/unigrabber.pl
Perl | 115 lines | 83 code | 12 blank | 20 comment | 6 complexity | 35f7e4f02aa0d1395e064cde72bb8cb3 MD5 | raw file
- #!/usr/bin/perl
- # This script is free software by the terms of the GPLv3
- # Contact:
- # Daniel N?gele - daniel.n.wb@gmail.com
- # Thanks to okbr and FaKeller (bitbucket usernames) for the help with the stylesheet
- # Follow the comments here, also make sure the directory
- # /mirror/password (whatever you choose) is created before running the script.
- # Besides a running perl interpreter, this script needs the perl module
- # LWP::Simple and the linux utility tar (exchangable).
- use LWP::Simple;
- $url_file='./urls.txt';
- $html_file='./index.html';
- sub prepnext
- {
- $nr++;
- $full_url = $url[$i];
- if($nr > 9)
- {
- $full_url =~ s/0<NUM>/<NUM>/g;
- }
- $full_url =~ s/<NUM>/$nr/g;
- @temp = split(/\//, $full_url);
- $filename = $temp[-1];
- # ugly, but checks if there has been a number left out
- if(not head($full_url) and $_[0] == undef)
- {
- prepnext(1);
- }
- }
- while(True)
- {
- open(URL, $url_file);
- chomp(@url=<URL>); # read the url file by line
- close(URL);
- open(HTML, ">" . $html_file);
- # insert custom html title/stylesheet below,
- print HTML "
- <html>\n
- <head>\n
- <title>PDF-Links Informatik SS12</title>\n
- <link rel='stylesheet' type='text/css' href='style.css'/>\n
- </head>\n
- <body>\n
- <div id='content'>\n";
- # loop over each url
- for($i = 1; $i <= $#url; $i = $i + 2)
- {
- # this code assembles the path of the copies on your server
- $path = "mirror/" . $url[$i - 1];
- unless(-d $path)
- {
- mkdir $path;
- }
- # header for a collection of files
- print HTML "<h3>" . $url[$i - 1] . "</h3>\n";
- $nr = 0;
- prepnext();
-
- # loop over the files at the url
- while(head($full_url)) # checks availability
- {
- unless(-e $filename)
- {
- # download if it hasn't been downloaded in a previous run
- getstore($full_url, $path . "/" . $filename);
- }
- # prints the links to the output file, especially 'Blatt'
- if ($filename =~ /tex$/) {
- getstore($full_url, $path . "/" . $filename);
- print HTML "<p><a href='" . $full_url . "'>TexFile</a>\n";
- print HTML "<a class='alt' id='alt' href='./" . $path . "/" .
- $filename . "'>(mirror)</a></p>\n";
- $filename =~ s/tex/pdf/;
- print HTML "<p><a href='./" . $path . "/" .
- $filename . "'>PDF File (aktuell)</a></p>\n";
- }
- else {
- print HTML "<p><a href='" . $full_url . "'>Blatt " . $nr . "</a>\n";
- print HTML "<a class='alt' id='alt' href='./" . $path . "/" .
- $filename . "'>(mirror)</a></p>\n";
- }
- prepnext();
- }
- }
- # again, custom html, styles and the footer
- print HTML "
- <div style='clear: both;'></div>\n
- </div>\n
- <div id='footer'>\n
- <div class='grid_half lft'>\n
- <a href='./mirror/mirror.tar'>Archiv</a><br>\n
- <a href='https://bitbucket.org/naegele/unigrabber'>Source</a>
- </div>\n
- <div class='grid_half rgt'>\n
- Erstellt: " . scalar localtime() . "</div>\n</div>\n </body>\n</html>";
- close(HTML);
- # external command, providing the archive
- system("tar -cf mirror/mirror.tar ./mirror");
- system('rubber -d --into mirror/"DSA TexFile"/ mirror/"DSA TexFile"/scribe_notes.tex');
- print "Updated. \n";
- # ugly method below, cronjob recommended
- sleep(28800); # = 8h
- system('rm mirror/"DSA TexFile"/scribe_notes.tex');
- }