PageRenderTime 68ms CodeModel.GetById 14ms app.highlight 49ms RepoModel.GetById 1ms app.codeStats 1ms

/tags/release-0.1-rc2/hive/external/docs/changes/changes2html.pl

Relevant Search: With Applications for Solr and Elasticsearch

For more in depth reading about search, ranking and generally everything you could ever want to know about how lucene, elasticsearch or solr work under the hood I highly suggest this book. Easily one of the most interesting technical books I have read in a long time. If you are tasked with solving search relevance problems even if not in Solr or Elasticsearch it should be your first reference. Amazon Affiliate Link
#
Perl | 282 lines | 207 code | 30 blank | 45 comment | 39 complexity | 182ed80decf796aaabd021171a0c60f4 MD5 | raw file
  1#!/usr/bin/perl
  2#
  3# Transforms Lucene Java's CHANGES.txt into Changes.html
  4#
  5# Input is on STDIN, output is to STDOUT
  6#
  7#
  8# Licensed to the Apache Software Foundation (ASF) under one or more
  9# contributor license agreements.  See the NOTICE file distributed with
 10# this work for additional information regarding copyright ownership.
 11# The ASF licenses this file to You under the Apache License, Version 2.0
 12# (the "License"); you may not use this file except in compliance with
 13# the License.  You may obtain a copy of the License at
 14#
 15#     http://www.apache.org/licenses/LICENSE-2.0
 16#
 17# Unless required by applicable law or agreed to in writing, software
 18# distributed under the License is distributed on an "AS IS" BASIS,
 19# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 20# See the License for the specific language governing permissions and
 21# limitations under the License.
 22#
 23
 24use strict;
 25use warnings;
 26
 27my $jira_url_prefix = 'http://issues.apache.org/jira/browse/';
 28my $title = undef;
 29my $release = undef;
 30my $sections = undef;
 31my $items = undef;
 32my $first_relid = undef;
 33my $second_relid = undef;
 34my @releases = ();
 35
 36my @lines = <>;                        # Get all input at once
 37
 38#
 39# Parse input and build hierarchical release structure in @releases
 40#
 41for (my $line_num = 0 ; $line_num <= $#lines ; ++$line_num) {
 42  $_ = $lines[$line_num];
 43  next unless (/\S/);                  # Skip blank lines
 44
 45  unless ($title) {
 46    if (/\S/) {
 47      s/^\s+//;                        # Trim leading whitespace
 48      s/\s+$//;                        # Trim trailing whitespace
 49    }
 50    $title = $_;
 51    next;
 52  }
 53
 54  if (/^(Release)|(Trunk)/) {   # Release headings
 55    $release = $_;
 56    $sections = [];
 57    push @releases, [ $release, $sections ];
 58    ($first_relid = lc($release)) =~ s/\s+/_/g   if ($#releases == 0);
 59    ($second_relid = lc($release)) =~ s/\s+/_/g  if ($#releases == 1);
 60    $items = undef;
 61    next;
 62  }
 63
 64  # Section heading: 2 leading spaces, words all capitalized
 65  if (/^  ([A-Z]+)\s*/) {
 66    my $heading = $_;
 67    $items = [];
 68    push @$sections, [ $heading, $items ];
 69    next;
 70  }
 71
 72  # Handle earlier releases without sections - create a headless section
 73  unless ($items) {
 74    $items = [];
 75    push @$sections, [ undef, $items ];
 76  }
 77
 78  my $type;
 79  if (@$items) { # A list item has been encountered in this section before
 80    $type = $items->[0];  # 0th position of items array is list type
 81  } else {
 82    $type = get_list_type($_);
 83    push @$items, $type;
 84  }
 85
 86  if ($type eq 'numbered') { # The modern items list style
 87    # List item boundary is another numbered item or an unindented line
 88    my $line;
 89    my $item = $_;
 90    $item =~ s/^(\s{0,2}\d+\.\s*)//;       # Trim the leading item number
 91    my $leading_ws_width = length($1);
 92    $item =~ s/\s+$//;                     # Trim trailing whitespace
 93    $item .= "\n";
 94
 95    while ($line_num < $#lines
 96           and ($line = $lines[++$line_num]) !~ /^(?:\s{0,2}\d+\.\s*\S|\S)/) {
 97      $line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
 98      $line =~ s/\s+$//;                   # Trim trailing whitespace
 99      $item .= "$line\n";
100    }
101    $item =~ s/\n+\Z/\n/;                  # Trim trailing blank lines
102    push @$items, $item;
103    --$line_num unless ($line_num == $#lines);
104  } elsif ($type eq 'paragraph') {         # List item boundary is a blank line
105    my $line;
106    my $item = $_;
107    $item =~ s/^(\s+)//;
108    my $leading_ws_width = defined($1) ? length($1) : 0;
109    $item =~ s/\s+$//;                     # Trim trailing whitespace
110    $item .= "\n";
111
112    while ($line_num < $#lines and ($line = $lines[++$line_num]) =~ /\S/) {
113      $line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
114      $line =~ s/\s+$//;                   # Trim trailing whitespace
115      $item .= "$line\n";
116    }
117    push @$items, $item;
118    --$line_num unless ($line_num == $#lines);
119  } else { # $type is one of the bulleted types
120    # List item boundary is another bullet or a blank line
121    my $line;
122    my $item = $_;
123    $item =~ s/^(\s*$type\s*)//;           # Trim the leading bullet
124    my $leading_ws_width = length($1);
125    $item =~ s/\s+$//;                     # Trim trailing whitespace
126    $item .= "\n";
127
128    while ($line_num < $#lines
129           and ($line = $lines[++$line_num]) !~ /^\s*(?:$type|\Z)/) {
130      $line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
131      $line =~ s/\s+$//;                   # Trim trailing whitespace
132      $item .= "$line\n";
133    }
134    push @$items, $item;
135    --$line_num unless ($line_num == $#lines);
136  }
137}
138
139#
140# Print HTML-ified version to STDOUT
141#
142print<<"__HTML_HEADER__";
143<!--
144**********************************************************
145** WARNING: This file is generated from CHANGES.txt by the 
146**          Perl script 'changes2html.pl'.
147**          Do *not* edit this file!
148**********************************************************
149          
150****************************************************************************
151* Licensed to the Apache Software Foundation (ASF) under one or more
152* contributor license agreements.  See the NOTICE file distributed with
153* this work for additional information regarding copyright ownership.
154* The ASF licenses this file to You under the Apache License, Version 2.0
155* (the "License"); you may not use this file except in compliance with
156* the License.  You may obtain a copy of the License at
157*
158*     http://www.apache.org/licenses/LICENSE-2.0
159*
160* Unless required by applicable law or agreed to in writing, software
161* distributed under the License is distributed on an "AS IS" BASIS,
162* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
163* See the License for the specific language governing permissions and
164* limitations under the License.
165****************************************************************************
166-->
167<html>
168<head>
169  <title>$title</title>
170  <link rel="stylesheet" href="ChangesFancyStyle.css" title="Fancy">
171  <link rel="alternate stylesheet" href="ChangesSimpleStyle.css" title="Simple">
172  <META http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
173  <SCRIPT>
174    function toggleList(e) {
175      element = document.getElementById(e).style;
176      element.display == 'none' ? element.display = 'block' : element.display='none';
177    }
178    function collapse() {
179      for (var i = 0; i < document.getElementsByTagName("ul").length; i++) {
180        var list = document.getElementsByTagName("ul")[i];
181        if (list.id != '$first_relid' && list.id != '$second_relid') {
182          list.style.display = "none";
183        }
184      }
185      for (var i = 0; i < document.getElementsByTagName("ol").length; i++) {
186        document.getElementsByTagName("ol")[i].style.display = "none"; 
187      }
188    }
189    window.onload = collapse;
190  </SCRIPT>
191</head>
192<body>
193
194<a href="http://hadoop.apache.org/hive/"><img class="logoImage" alt="Hive" src="images/hive-logo.jpg" title="SQL and Data Warehousing Platform on Hadoop"></a>
195<h1>$title</h1>
196
197__HTML_HEADER__
198
199my $heading;
200my $relcnt = 0;
201my $header = 'h2';
202for my $rel (@releases) {
203  if (++$relcnt == 3) {
204    $header = 'h3';
205    print "<h2><a href=\"javascript:toggleList('older')\">";
206    print "Older Releases";
207    print "</a></h2>\n";
208    print "<ul id=\"older\">\n"
209  }
210      
211  ($release, $sections) = @$rel;
212
213  # The first section heading is undefined for the older sectionless releases
214  my $has_release_sections = $sections->[0][0];
215
216  (my $relid = lc($release)) =~ s/\s+/_/g;
217  print "<$header><a href=\"javascript:toggleList('$relid')\">";
218  print "$release";
219  print "</a></$header>\n";
220  print "<ul id=\"$relid\">\n"
221    if ($has_release_sections);
222
223  for my $section (@$sections) {
224    ($heading, $items) = @$section;
225    (my $sectid = lc($heading)) =~ s/\s+/_/g;
226    my $numItemsStr = $#{$items} > 0 ? "($#{$items})" : "(none)";  
227
228    print "  <li><a href=\"javascript:toggleList('$relid.$sectid')\">",
229          ($heading || ''), "</a>&nbsp;&nbsp;&nbsp;$numItemsStr\n"
230      if ($has_release_sections);
231
232    my $list_type = $items->[0] || '';
233    my $list = ($has_release_sections || $list_type eq 'numbered' ? 'ol' : 'ul');
234    my $listid = $sectid ? "$relid.$sectid" : $relid;
235    print "    <$list id=\"$listid\">\n";
236
237    for my $itemnum (1..$#{$items}) {
238      my $item = $items->[$itemnum];
239      $item =~ s:&:&amp;:g;                            # Escape HTML metachars
240      $item =~ s:<:&lt;:g; 
241      $item =~ s:>:&gt;:g;
242
243      $item =~ s:\s*(\([^)"]+?\))\s*$:<br />$1:;       # Separate attribution
244      $item =~ s:\n{2,}:\n<p/>\n:g;                    # Keep paragraph breaks
245      $item =~ s{(?:${jira_url_prefix})?(HADOOP-\d+)}  # Link to JIRA
246                {<a href="${jira_url_prefix}$1">$1</a>}g;
247      print "      <li>$item</li>\n";
248    }
249    print "    </$list>\n";
250    print "  </li>\n" if ($has_release_sections);
251  }
252  print "</ul>\n" if ($has_release_sections);
253}
254print "</ul>\n" if ($relcnt > 3);
255print "</body>\n</html>\n";
256
257
258#
259# Subroutine: get_list_type
260#
261# Takes one parameter:
262#
263#    - The first line of a sub-section/point
264#
265# Returns one scalar:
266#
267#    - The list type: 'numbered'; or one of the bulleted types '-', or '.' or
268#      'paragraph'.
269#
270sub get_list_type {
271  my $first_list_item_line = shift;
272  my $type = 'paragraph'; # Default to paragraph type
273
274  if ($first_list_item_line =~ /^\s{0,2}\d+\.\s+\S+/) {
275    $type = 'numbered';
276  } elsif ($first_list_item_line =~ /^\s*([-.])\s+\S+/) {
277    $type = $1;
278  }
279  return $type;
280}
281
2821;