PageRenderTime 13ms CodeModel.GetById 2ms app.highlight 5ms RepoModel.GetById 1ms app.codeStats 0ms

/doc/html2wiki.sh

http://cmockery.googlecode.com/
Shell | 154 lines | 103 code | 1 blank | 50 comment | 0 complexity | c03225fa0b5093f4f8c211c9c6d5b65e MD5 | raw file
  1#!/bin/bash
  2#
  3# Translate really simple html to googlecode.com wiki.
  4#
  5# Usage: cat input.html | html2wiki.sh > outputwiki.txt
  6#
  7# Most of this script is simple sed substitutions with an awk script to handle
  8# hierarchical lists.
  9
 10# Awk program to escape all instances of * outside of <listing></listing>
 11awk '
 12BEGIN { in_listing = 0; }
 13/<[Ll][Ii][Ss][Tt][Ii][Nn][Gg]>/ { in_listing = 1; }
 14/<\/[Ll][Ii][Ss][Tt][Ii][Nn][Gg]>/ { in_listing = 0; }
 15/.*/ {
 16  if (in_listing) {
 17    print $0;
 18  } else {
 19    print gensub("*", "`*`", "g", $0)
 20  }
 21}' | \
 22# Awk program to convert hierachical unordered and ordered lists into
 23# googlecode wiki list markup.  This is limited to converting very simple
 24# html lists in the form:
 25#
 26# <ul>
 27#   <li>item 1</li>
 28#   ...
 29#   <li>item N</li>
 30# </ul>
 31#
 32# This script also removes leading spaces from all lines outside of <listing>
 33# sections.
 34awk '
 35BEGIN {
 36  list_type_none = 0;
 37  list_type_ordered = 1;
 38  list_type_unordered = 2;
 39  # Number of nested lists.
 40  list_depth = 0;
 41  # Number of items in the list.
 42  list_items[list_depth] = 0;
 43  # Type of list.
 44  list_type[list_depth] = list_type_none;
 45  # Do nott strip whitespace from listing sections.
 46  in_listing = 0;
 47}
 48
 49# Generate a string of indent spaces.
 50function list_indent(indent) {
 51  format = sprintf("%%%ds", indent);
 52  return sprintf(format, "");
 53}
 54
 55/<[Ll][Ii][Ss][Tt][Ii][Nn][Gg]>/ { in_listing = 1; }
 56/<\/[Ll][Ii][Ss][Tt][Ii][Nn][Gg]>/ { in_listing = 0; }
 57
 58# Process all lines non-blank lines.
 59/^.*$/ {
 60  # Remove leading white space.
 61  if (!in_listing) {
 62    output_string = gensub(/^ */, "", 1, $0);
 63  } else {
 64    output_string = $0;
 65  }
 66  search_string = output_string
 67
 68  # Replace list tags with googlecode wiki markup.
 69  while (match(search_string, /<[^>]*>/, matches)) {
 70    tag = matches[0];
 71    search_string = substr(search_string,
 72                           matches[0, "start"] + matches[0, "length"]);
 73    if (match(tag, /^<[Uu][Ll]>$/)) {
 74      list_depth++;
 75      list_type[list_depth] = list_type_unordered;
 76      list_items[list_depth] = 0;
 77      output_string = gensub(tag, "", 1, output_string);
 78    } else if (match(tag, /^[Oo][Ll]>$/)) {
 79      list_depth++;
 80      list_type[list_depth] = list_type_ordered;
 81      list_items[list_depth] = 0;
 82      output_string = gensub(tag, "", 1, output_string);
 83    } else if (match(tag, /^<\/[Ll][Ii]>$/)) {
 84      output_string = gensub(tag, "", 1, output_string);
 85    } else if (list_depth) {
 86      if (match(tag, /^<[Ll][Ii]>$/)) {
 87        if (list_type[list_depth] == list_type_unordered) {
 88          output_string = gensub(tag, list_indent(list_depth) "* ", 1,
 89                                 output_string);
 90        } else if (list_type[list_depth] == list_type_ordered) {
 91          output_string = gensub(tag, list_indent(list_depth) "# ", 1,
 92                                 output_string);
 93        }
 94      } else if (match(tag, /^<\/[Uu][Ll]>$/) ||
 95                 match(tag, /^<\/[Ou][Ll]>$/)) {
 96        output_string = gensub(tag, "", 1, output_string);
 97        list_depth --;
 98      }
 99    }
100  }
101  # If a list is being parsed then filter blank lines.
102  if (list_depth == 0 || length(output_string)) {
103    print output_string 
104  }
105}
106' | \
107# This sed program translates really simple html into wiki suitable for
108# googlecode.com.
109#
110# Supported tags:
111# <p>
112# <br>
113# <h1>
114# <h2>
115# <h3>
116# <h4>
117# <h5>
118# <b>
119# <i>
120# <a href="#.*">.*</a>
121# <a href=".*">.*</a>
122# <a name=".*'>.*</a>
123#
124# Supported entities:
125# &gt;
126# &lt;
127#
128# Limitations:
129# * Anchors must be on a single line and must contain one of either the name or
130#   href attributes.
131# * Href of local anchors (href="#.*") should be set to the name of a heading
132#   within the document.  If the heading contains spaces the href should
133#   contain underscores.
134# * All external links are relative to
135#   http://cmockery.googlecode.com/svn/trunk/doc/
136sed -r '
137s@<[Pp]>@\n@g;
138s@<[[Bb][Rr]]>@\n@g;
139s@</?[Hh]1>@=@g;
140s@</?[Hh]2>@==@g;
141s@</?[Hh]3>@===@g;
142s@</?[Hh]4>@====@g;
143s@</?[Hh]5>@====@g;
144s@</?[Bb]>@*@g;
145s@</?[Ii]>@_@g;
146s@<[Ll][Ii][Ss][Tt][Ii][Nn][Gg]>@{{{@g;
147s@</[Ll][Ii][Ss][Tt][Ii][Nn][Gg]>@}}}@g;
148s@<[Aa].*?href="#(.*)?">(.*)?</[Aa]>@[#\1 \2]@g;
149s@<[Aa].*?href="(.*)?">(.*)?</[Aa]>@[http://cmockery.googlecode.com/svn/trunk/doc/\1 \2]@g;
150s@<[Aa].*?name="(.*)?">@@g;
151s@</[Aa]>@@g;
152s@<.*?>@@g;
153s@&lt;@<@g;
154s@&gt;@>@g;'