/App/UnxUtilsSrc/unxutils/gawk-3.0.4/test/funstack.awk
AWK | 977 lines | 678 code | 142 blank | 157 comment | 0 complexity | c220cce63839178ed27a18ef84698c29 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.0, LGPL-2.1, CC-BY-SA-3.0, Unlicense, AGPL-1.0
- ### ====================================================================
- ### @Awk-file{
- ### author = "Nelson H. F. Beebe",
- ### version = "1.00",
- ### date = "09 October 1996",
- ### time = "15:57:06 MDT",
- ### filename = "journal-toc.awk",
- ### address = "Center for Scientific Computing
- ### Department of Mathematics
- ### University of Utah
- ### Salt Lake City, UT 84112
- ### USA",
- ### telephone = "+1 801 581 5254",
- ### FAX = "+1 801 581 4148",
- ### URL = "http://www.math.utah.edu/~beebe",
- ### checksum = "25092 977 3357 26493",
- ### email = "beebe@math.utah.edu (Internet)",
- ### codetable = "ISO/ASCII",
- ### keywords = "BibTeX, bibliography, HTML, journal table of
- ### contents",
- ### supported = "yes",
- ### docstring = "Create a journal cover table of contents from
- ### <at>Article{...} entries in a journal BibTeX
- ### .bib file for checking the bibliography
- ### database against the actual journal covers.
- ### The output can be either plain text, or HTML.
- ###
- ### Usage:
- ### bibclean -max-width 0 BibTeX-file(s) | \
- ### bibsort -byvolume | \
- ### awk -f journal-toc.awk \
- ### [-v HTML=nnn] [-v INDENT=nnn] \
- ### [-v BIBFILEURL=url] >foo.toc
- ###
- ### or if the bibliography is already sorted
- ### by volume,
- ###
- ### bibclean -max-width 0 BibTeX-file(s) | \
- ### awk -f journal-toc.awk \
- ### [-v HTML=nnn] [-v INDENT=nnn] \
- ### [-v BIBFILEURL=url] >foo.toc
- ###
- ### A non-zero value of the command-line option,
- ### HTML=nnn, results in HTML output instead of
- ### the default plain ASCII text (corresponding
- ### to HTML=0). The
- ###
- ### The INDENT=nnn command-line option specifies
- ### the number of blanks to indent each logical
- ### level of HTML. The default is INDENT=4.
- ### INDENT=0 suppresses indentation. The INDENT
- ### option has no effect when the default HTML=0
- ### (plain text output) option is in effect.
- ###
- ### When HTML output is selected, the
- ### BIBFILEURL=url command-line option provides a
- ### way to request hypertext links from table of
- ### contents page numbers to the complete BibTeX
- ### entry for the article. These links are
- ### created by appending a sharp (#) and the
- ### citation label to the BIBFILEURL value, which
- ### conforms with the practice of
- ### bibtex-to-html.awk.
- ###
- ### The HTML output form may be useful as a more
- ### compact representation of journal article
- ### bibliography data than the original BibTeX
- ### file provides. Of course, the
- ### table-of-contents format provides less
- ### information, and is considerably more
- ### troublesome for a computer program to parse.
- ###
- ### When URL key values are provided, they will
- ### be used to create hypertext links around
- ### article titles. This supports journals that
- ### provide article contents on the World-Wide
- ### Web.
- ###
- ### For parsing simplicity, this program requires
- ### that BibTeX
- ###
- ### key = "value"
- ###
- ### and
- ###
- ### @String{name = "value"}
- ###
- ### specifications be entirely contained on
- ### single lines, which is readily provided by
- ### the `bibclean -max-width 0' filter. It also
- ### requires that bibliography entries begin and
- ### end at the start of a line, and that
- ### quotation marks, rather than balanced braces,
- ### delimit string values. This is a
- ### conventional format that again can be
- ### guaranteed by bibclean.
- ###
- ### This program requires `new' awk, as described
- ### in the book
- ###
- ### Alfred V. Aho, Brian W. Kernighan, and
- ### Peter J. Weinberger,
- ### ``The AWK Programming Language'',
- ### Addison-Wesley (1988), ISBN
- ### 0-201-07981-X,
- ###
- ### such as provided by programs named (GNU)
- ### gawk, nawk, and recent AT&T awk.
- ###
- ### The checksum field above contains a CRC-16
- ### checksum as the first value, followed by the
- ### equivalent of the standard UNIX wc (word
- ### count) utility output of lines, words, and
- ### characters. This is produced by Robert
- ### Solovay's checksum utility.",
- ### }
- ### ====================================================================
- BEGIN { initialize() }
- /^ *@ *[Ss][Tt][Rr][Ii][Nn][Gg] *{/ { do_String(); next }
- /^ *@ *[Pp][Rr][Ee][Aa][Mm][Bb][Ll][Ee]/ { next }
- /^ *@ *[Aa][Rr][Tt][Ii][Cc][Ll][Ee]/ { do_Article(); next }
- /^ *@/ { do_Other(); next }
- /^ *author *= *\"/ { do_author(); next }
- /^ *journal *= */ { do_journal(); next }
- /^ *volume *= *\"/ { do_volume(); next }
- /^ *number *= *\"/ { do_number(); next }
- /^ *year *= *\"/ { do_year(); next }
- /^ *month *= */ { do_month(); next }
- /^ *title *= *\"/ { do_title(); next }
- /^ *pages *= *\"/ { do_pages(); next }
- /^ *URL *= *\"/ { do_URL(); next }
- /^ *} *$/ { if (In_Article) do_end_entry(); next }
- END { terminate() }
- ########################################################################
- # NB: The programming conventions for variables in this program are: #
- # UPPERCASE global constants and user options #
- # Initialuppercase global variables #
- # lowercase local variables #
- # Any deviation is an error! #
- ########################################################################
- function do_Article()
- {
- In_Article = 1
- Citation_label = $0
- sub(/^[^\{]*{/,"",Citation_label)
- sub(/ *, *$/,"",Citation_label)
- Author = ""
- Title = ""
- Journal = ""
- Volume = ""
- Number = ""
- Month = ""
- Year = ""
- Pages = ""
- Url = ""
- }
- function do_author()
- {
- Author = TeX_to_HTML(get_value($0))
- }
- function do_end_entry( k,n,parts)
- {
- n = split(Author,parts," and ")
- if (Last_number != Number)
- do_new_issue()
- for (k = 1; k < n; ++k)
- print_toc_line(parts[k] " and", "", "")
- Title_prefix = html_begin_title()
- Title_suffix = html_end_title()
- if (html_length(Title) <= (MAX_TITLE_CHARS + MIN_LEADERS)) # complete title fits on line
- print_toc_line(parts[n], Title, html_begin_pages() Pages html_end_pages())
- else # need to split long title over multiple lines
- do_long_title(parts[n], Title, html_begin_pages() Pages html_end_pages())
- }
- function do_journal()
- {
- if ($0 ~ /[=] *"/) # have journal = "quoted journal name",
- Journal = get_value($0)
- else # have journal = journal-abbreviation,
- {
- Journal = get_abbrev($0)
- if (Journal in String) # replace abbrev by its expansion
- Journal = String[Journal]
- }
- gsub(/\\-/,"",Journal) # remove discretionary hyphens
- }
- function do_long_title(author,title,pages, last_title,n)
- {
- title = trim(title) # discard leading and trailing space
- while (length(title) > 0)
- {
- n = html_breakpoint(title,MAX_TITLE_CHARS+MIN_LEADERS)
- last_title = substr(title,1,n)
- title = substr(title,n+1)
- sub(/^ +/,"",title) # discard any leading space
- print_toc_line(author, last_title, (length(title) == 0) ? pages : "")
- author = ""
- }
- }
- function do_month( k,n,parts)
- {
- Month = ($0 ~ /[=] *"/) ? get_value($0) : get_abbrev($0)
- gsub(/[\"]/,"",Month)
- gsub(/ *# *\\slash *# */," / ",Month)
- gsub(/ *# *-+ *# */," / ",Month)
- n = split(Month,parts," */ *")
- Month = ""
- for (k = 1; k <= n; ++k)
- Month = Month ((k > 1) ? " / " : "") \
- ((parts[k] in Month_expansion) ? Month_expansion[parts[k]] : parts[k])
- }
- function do_new_issue()
- {
- Last_number = Number
- if (HTML)
- {
- if (Last_volume != Volume)
- {
- Last_volume = Volume
- print_line(prefix(2) "<BR>")
- }
- html_end_toc()
- html_begin_issue()
- print_line(prefix(2) Journal "<BR>")
- }
- else
- {
- print_line("")
- print_line(Journal)
- }
- print_line(strip_html(vol_no_month_year()))
- if (HTML)
- {
- html_end_issue()
- html_toc_entry()
- html_begin_toc()
- }
- else
- print_line("")
- }
- function do_number()
- {
- Number = get_value($0)
- }
- function do_Other()
- {
- In_Article = 0
- }
- function do_pages()
- {
- Pages = get_value($0)
- sub(/--[?][?]/,"",Pages)
- }
- function do_String()
- {
- sub(/^[^\{]*\{/,"",$0) # discard up to and including open brace
- sub(/\} *$/,"",$0) # discard from optional whitespace and trailing brace to end of line
- String[get_key($0)] = get_value($0)
- }
- function do_title()
- {
- Title = TeX_to_HTML(get_value($0))
- }
- function do_URL( parts)
- {
- Url = get_value($0)
- split(Url,parts,"[,;]") # in case we have multiple URLs
- Url = trim(parts[1])
- }
- function do_volume()
- {
- Volume = get_value($0)
- }
- function do_year()
- {
- Year = get_value($0)
- }
- function get_abbrev(s)
- { # return abbrev from ``key = abbrev,''
- sub(/^[^=]*= */,"",s) # discard text up to start of non-blank value
- sub(/ *,? *$/,"",s) # discard trailing optional whitspace, quote,
- # optional comma, and optional space
- return (s)
- }
- function get_key(s)
- { # return kay from ``key = "value",''
- sub(/^ */,"",s) # discard leading space
- sub(/ *=.*$/,"",s) # discard everthing after key
- return (s)
- }
- function get_value(s)
- { # return value from ``key = "value",''
- sub(/^[^\"]*\" */,"",s) # discard text up to start of non-blank value
- sub(/ *\",? *$/,"",s) # discard trailing optional whitspace, quote,
- # optional comma, and optional space
- return (s)
- }
- function html_accents(s)
- {
- if (index(s,"\\") > 0) # important optimization
- {
- # Convert common lower-case accented letters according to the
- # table on p. 169 of in Peter Flynn's ``The World Wide Web
- # Handbook'', International Thomson Computer Press, 1995, ISBN
- # 1-85032-205-8. The official table of ISO Latin 1 SGML
- # entities used in HTML can be found in the file
- # /usr/local/lib/html-check/lib/ISOlat1.sgml (your path
- # may differ).
- gsub(/{\\\a}/, "\\à", s)
- gsub(/{\\'a}/, "\\á", s)
- gsub(/{\\[\^]a}/,"\\â", s)
- gsub(/{\\~a}/, "\\ã", s)
- gsub(/{\\\"a}/, "\\ä", s)
- gsub(/{\\aa}/, "\\å", s)
- gsub(/{\\ae}/, "\\æ", s)
- gsub(/{\\c{c}}/,"\\ç", s)
- gsub(/{\\\e}/, "\\è", s)
- gsub(/{\\'e}/, "\\é", s)
- gsub(/{\\[\^]e}/,"\\ê", s)
- gsub(/{\\\"e}/, "\\ë", s)
- gsub(/{\\\i}/, "\\ì", s)
- gsub(/{\\'i}/, "\\í", s)
- gsub(/{\\[\^]i}/,"\\î", s)
- gsub(/{\\\"i}/, "\\ï", s)
- # ignore eth and thorn
- gsub(/{\\~n}/, "\\ñ", s)
- gsub(/{\\\o}/, "\\ò", s)
- gsub(/{\\'o}/, "\\ó", s)
- gsub(/{\\[\^]o}/, "\\ô", s)
- gsub(/{\\~o}/, "\\õ", s)
- gsub(/{\\\"o}/, "\\ö", s)
- gsub(/{\\o}/, "\\ø", s)
- gsub(/{\\\u}/, "\\ù", s)
- gsub(/{\\'u}/, "\\ú", s)
- gsub(/{\\[\^]u}/,"\\û", s)
- gsub(/{\\\"u}/, "\\ü", s)
- gsub(/{\\'y}/, "\\ý", s)
- gsub(/{\\\"y}/, "\\ÿ", s)
- # Now do the same for upper-case accents
- gsub(/{\\\A}/, "\\À", s)
- gsub(/{\\'A}/, "\\Á", s)
- gsub(/{\\[\^]A}/, "\\Â", s)
- gsub(/{\\~A}/, "\\Ã", s)
- gsub(/{\\\"A}/, "\\Ä", s)
- gsub(/{\\AA}/, "\\Å", s)
- gsub(/{\\AE}/, "\\Æ", s)
- gsub(/{\\c{C}}/,"\\Ç", s)
- gsub(/{\\\e}/, "\\È", s)
- gsub(/{\\'E}/, "\\É", s)
- gsub(/{\\[\^]E}/, "\\Ê", s)
- gsub(/{\\\"E}/, "\\Ë", s)
- gsub(/{\\\I}/, "\\Ì", s)
- gsub(/{\\'I}/, "\\Í", s)
- gsub(/{\\[\^]I}/, "\\Î", s)
- gsub(/{\\\"I}/, "\\Ï", s)
- # ignore eth and thorn
- gsub(/{\\~N}/, "\\Ñ", s)
- gsub(/{\\\O}/, "\\Ò", s)
- gsub(/{\\'O}/, "\\Ó", s)
- gsub(/{\\[\^]O}/, "\\Ô", s)
- gsub(/{\\~O}/, "\\Õ", s)
- gsub(/{\\\"O}/, "\\Ö", s)
- gsub(/{\\O}/, "\\Ø", s)
- gsub(/{\\\U}/, "\\Ù", s)
- gsub(/{\\'U}/, "\\Ú", s)
- gsub(/{\\[\^]U}/, "\\Û", s)
- gsub(/{\\\"U}/, "\\Ü", s)
- gsub(/{\\'Y}/, "\\Ý", s)
- gsub(/{\\ss}/, "\\ß", s)
- # Others not mentioned in Flynn's book
- gsub(/{\\'\\i}/,"\\í", s)
- gsub(/{\\'\\j}/,"j", s)
- }
- return (s)
- }
- function html_begin_issue()
- {
- print_line("")
- print_line(prefix(2) "<HR>")
- print_line("")
- print_line(prefix(2) "<H1>")
- print_line(prefix(3) "<A NAME=\"" html_label() "\">")
- }
- function html_begin_pages()
- {
- return ((HTML && (BIBFILEURL != "")) ? ("<A HREF=\"" BIBFILEURL "#" Citation_label "\">") : "")
- }
- function html_begin_pre()
- {
- In_PRE = 1
- print_line("<PRE>")
- }
- function html_begin_title()
- {
- return ((HTML && (Url != "")) ? ("<A HREF=\"" Url "\">") : "")
- }
- function html_begin_toc()
- {
- html_end_toc()
- html_begin_pre()
- }
- function html_body( k)
- {
- for (k = 1; k <= BodyLines; ++k)
- print Body[k]
- }
- function html_breakpoint(title,maxlength, break_after,k)
- {
- # Return the largest character position in title AFTER which we
- # can break the title across lines, without exceeding maxlength
- # visible characters.
- if (html_length(title) > maxlength) # then need to split title across lines
- {
- # In the presence of HTML markup, the initialization of
- # k here is complicated, because we need to advance it
- # until html_length(title) is at least maxlength,
- # without invoking the expensive html_length() function
- # too frequently. The need to split the title makes the
- # alternative of delayed insertion of HTML markup much
- # more complicated.
- break_after = 0
- for (k = min(maxlength,length(title)); k < length(title); ++k)
- {
- if (substr(title,k+1,1) == " ")
- { # could break after position k
- if (html_length(substr(title,1,k)) <= maxlength)
- break_after = k
- else # advanced too far, retreat back to last break_after
- break
- }
- }
- if (break_after == 0) # no breakpoint found by forward scan
- { # so switch to backward scan
- for (k = min(maxlength,length(title)) - 1; \
- (k > 0) && (substr(title,k+1,1) != " "); --k)
- ; # find space at which to break title
- if (k < 1) # no break point found
- k = length(title) # so must print entire string
- }
- else
- k = break_after
- }
- else # title fits on one line
- k = length(title)
- return (k)
- }
- function html_end_issue()
- {
- print_line(prefix(3) "</A>")
- print_line(prefix(2) "</H1>")
- }
- function html_end_pages()
- {
- return ((HTML && (BIBFILEURL != "")) ? "</A>" : "")
- }
- function html_end_pre()
- {
- if (In_PRE)
- {
- print_line("</PRE>")
- In_PRE = 0
- }
- }
- function html_end_title()
- {
- return ((HTML && (Url != "")) ? "</A>" : "")
- }
- function html_end_toc()
- {
- html_end_pre()
- }
- function html_fonts(s, arg,control_word,k,level,n,open_brace)
- {
- open_brace = index(s,"{")
- if (open_brace > 0) # important optimization
- {
- level = 1
- for (k = open_brace + 1; (level != 0) && (k <= length(s)); ++k)
- {
- if (substr(s,k,1) == "{")
- level++
- else if (substr(s,k,1) == "}")
- level--
- }
- # {...} is now found at open_brace ... (k-1)
- for (control_word in Font_decl_map) # look for {\xxx ...}
- {
- if (substr(s,open_brace+1,length(control_word)+1) ~ \
- ("\\" control_word "[^A-Za-z]"))
- {
- n = open_brace + 1 + length(control_word)
- arg = trim(substr(s,n,k - n))
- if (Font_decl_map[control_word] == "toupper") # arg -> ARG
- arg = toupper(arg)
- else if (Font_decl_map[control_word] != "") # arg -> <TAG>arg</TAG>
- arg = "<" Font_decl_map[control_word] ">" arg "</" Font_decl_map[control_word] ">"
- return (substr(s,1,open_brace-1) arg html_fonts(substr(s,k)))
- }
- }
- for (control_word in Font_cmd_map) # look for \xxx{...}
- {
- if (substr(s,open_brace - length(control_word),length(control_word)) ~ \
- ("\\" control_word))
- {
- n = open_brace + 1
- arg = trim(substr(s,n,k - n))
- if (Font_cmd_map[control_word] == "toupper") # arg -> ARG
- arg = toupper(arg)
- else if (Font_cmd_map[control_word] != "") # arg -> <TAG>arg</TAG>
- arg = "<" Font_cmd_map[control_word] ">" arg "</" Font_cmd_map[control_word] ">"
- n = open_brace - length(control_word) - 1
- return (substr(s,1,n) arg html_fonts(substr(s,k)))
- }
- }
- }
- return (s)
- }
- function html_header()
- {
- USER = ENVIRON["USER"]
- if (USER == "")
- USER = ENVIRON["LOGNAME"]
- if (USER == "")
- USER = "????"
- "hostname" | getline HOSTNAME
- "date" | getline DATE
- ("ypcat passwd | grep '^" USER ":' | awk -F: '{print $5}'") | getline PERSONAL_NAME
- if (PERSONAL_NAME == "")
- ("grep '^" USER ":' /etc/passwd | awk -F: '{print $5}'") | getline PERSONAL_NAME
- print "<!-- WARNING: Do NOT edit this file. It was converted from -->"
- print "<!-- BibTeX format to HTML by journal-toc.awk version " VERSION_NUMBER " " VERSION_DATE " -->"
- print "<!-- on " DATE " -->"
- print "<!-- for " PERSONAL_NAME " (" USER "@" HOSTNAME ") -->"
- print ""
- print ""
- print "<!DOCTYPE HTML public \"-//IETF//DTD HTML//EN\">"
- print ""
- print "<HTML>"
- print prefix(1) "<HEAD>"
- print prefix(2) "<TITLE>"
- print prefix(3) Journal
- print prefix(2) "</TITLE>"
- print prefix(2) "<LINK REV=\"made\" HREF=\"mailto:" USER "@" HOSTNAME "\">"
- print prefix(1) "</HEAD>"
- print ""
- print prefix(1) "<BODY>"
- }
- function html_label( label)
- {
- label = Volume "(" Number "):" Month ":" Year
- gsub(/[^A-Za-z0-9():,;.\/\-]/,"",label)
- return (label)
- }
- function html_length(s)
- { # Return visible length of s, ignoring any HTML markup
- if (HTML)
- {
- gsub(/<\/?[^>]*>/,"",s) # remove SGML tags
- gsub(/&[A-Za-z0-9]+;/,"",s) # remove SGML entities
- }
- return (length(s))
- }
- function html_toc()
- {
- print prefix(2) "<H1>"
- print prefix(3) "Table of contents for issues of " Journal
- print prefix(2) "</H1>"
- print HTML_TOC
- }
- function html_toc_entry()
- {
- HTML_TOC = HTML_TOC " <A HREF=\"#" html_label() "\">"
- HTML_TOC = HTML_TOC vol_no_month_year()
- HTML_TOC = HTML_TOC "</A><BR>" "\n"
- }
- function html_trailer()
- {
- html_end_pre()
- print prefix(1) "</BODY>"
- print "</HTML>"
- }
- function initialize()
- {
- # NB: Update these when the program changes
- VERSION_DATE = "[09-Oct-1996]"
- VERSION_NUMBER = "1.00"
- HTML = (HTML == "") ? 0 : (0 + HTML)
- if (INDENT == "")
- INDENT = 4
- if (HTML == 0)
- INDENT = 0 # indentation suppressed in ASCII mode
- LEADERS = " . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ."
- MAX_TITLE_CHARS = 36 # 36 produces a 79-char output line when there is
- # just an initial page number. If this is
- # increased, the LEADERS string may need to be
- # lengthened.
- MIN_LEADERS = 4 # Minimum number of characters from LEADERS
- # required when leaders are used. The total
- # number of characters that can appear in a
- # title line is MAX_TITLE_CHARS + MIN_LEADERS.
- # Leaders are omitted when the title length is
- # between MAX_TITLE_CHARS and this sum.
- MIN_LEADERS_SPACE = " " # must be at least MIN_LEADERS characters long
- Month_expansion["jan"] = "January"
- Month_expansion["feb"] = "February"
- Month_expansion["mar"] = "March"
- Month_expansion["apr"] = "April"
- Month_expansion["may"] = "May"
- Month_expansion["jun"] = "June"
- Month_expansion["jul"] = "July"
- Month_expansion["aug"] = "August"
- Month_expansion["sep"] = "September"
- Month_expansion["oct"] = "October"
- Month_expansion["nov"] = "November"
- Month_expansion["dec"] = "December"
- Font_cmd_map["\\emph"] = "EM"
- Font_cmd_map["\\textbf"] = "B"
- Font_cmd_map["\\textit"] = "I"
- Font_cmd_map["\\textmd"] = ""
- Font_cmd_map["\\textrm"] = ""
- Font_cmd_map["\\textsc"] = "toupper"
- Font_cmd_map["\\textsl"] = "I"
- Font_cmd_map["\\texttt"] = "t"
- Font_cmd_map["\\textup"] = ""
- Font_decl_map["\\bf"] = "B"
- Font_decl_map["\\em"] = "EM"
- Font_decl_map["\\it"] = "I"
- Font_decl_map["\\rm"] = ""
- Font_decl_map["\\sc"] = "toupper"
- Font_decl_map["\\sf"] = ""
- Font_decl_map["\\tt"] = "TT"
- Font_decl_map["\\itshape"] = "I"
- Font_decl_map["\\upshape"] = ""
- Font_decl_map["\\slshape"] = "I"
- Font_decl_map["\\scshape"] = "toupper"
- Font_decl_map["\\mdseries"] = ""
- Font_decl_map["\\bfseries"] = "B"
- Font_decl_map["\\rmfamily"] = ""
- Font_decl_map["\\sffamily"] = ""
- Font_decl_map["\\ttfamily"] = "TT"
- }
- function min(a,b)
- {
- return (a < b) ? a : b
- }
- function prefix(level)
- {
- # Return a prefix of up to 60 blanks
- if (In_PRE)
- return ("")
- else
- return (substr(" ", \
- 1, INDENT * level))
- }
- function print_line(line)
- {
- if (HTML) # must buffer in memory so that we can accumulate TOC
- Body[++BodyLines] = line
- else
- print line
- }
- function print_toc_line(author,title,pages, extra,leaders,n,t)
- {
- # When we have a multiline title, the hypertext link goes only
- # on the first line. A multiline hypertext link looks awful
- # because of long underlines under the leading indentation.
- if (pages == "") # then no leaders needed in title lines other than last one
- t = sprintf("%31s %s%s%s", author, Title_prefix, title, Title_suffix)
- else # last title line, with page number
- {
- n = html_length(title) # potentially expensive
- extra = n % 2 # extra space for aligned leader dots
- if (n <= MAX_TITLE_CHARS) # then need leaders
- leaders = substr(LEADERS, 1, MAX_TITLE_CHARS + MIN_LEADERS - extra - \
- min(MAX_TITLE_CHARS,n))
- else # title (almost) fills line, so no leaders
- leaders = substr(MIN_LEADERS_SPACE,1, \
- (MAX_TITLE_CHARS + MIN_LEADERS - extra - n))
- t = sprintf("%31s %s%s%s%s%s %4s", \
- author, Title_prefix, title, Title_suffix, \
- (extra ? " " : ""), leaders, pages)
- }
- Title_prefix = "" # forget any hypertext
- Title_suffix = "" # link material
- # Efficency note: an earlier version accumulated the body in a
- # single scalar like this: "Body = Body t". Profiling revealed
- # this statement as the major hot spot, and the change to array
- # storage made the program more than twice as fast. This
- # suggests that awk might benefit from an optimization of
- # "s = s t" that uses realloc() instead of malloc().
- if (HTML)
- Body[++BodyLines] = t
- else
- print t
- }
- function protect_SGML_characters(s)
- {
- gsub(/&/,"\\&",s) # NB: this one MUST be first
- gsub(/</,"\\<",s)
- gsub(/>/,"\\>",s)
- gsub(/\"/,"\\"",s)
- return (s)
- }
- function strip_braces(s, k)
- { # strip non-backslashed braces from s and return the result
- return (strip_char(strip_char(s,"{"),"}"))
- }
- function strip_char(s,c, k)
- { # strip non-backslashed instances of c from s, and return the result
- k = index(s,c)
- if (k > 0) # then found the character
- {
- if (substr(s,k-1,1) != "\\") # then not backslashed char
- s = substr(s,1,k-1) strip_char(substr(s,k+1),c) # so remove it (recursively)
- else # preserve backslashed char
- s = substr(s,1,k) strip_char(s,k+1,c)
- }
- return (s)
- }
- function strip_html(s)
- {
- gsub(/<\/?[^>]*>/,"",s)
- return (s)
- }
- function terminate()
- {
- if (HTML)
- {
- html_end_pre()
- HTML = 0 # NB: stop line buffering
- html_header()
- html_toc()
- html_body()
- html_trailer()
- }
- }
- function TeX_to_HTML(s, k,n,parts)
- {
- # First convert the four SGML reserved characters to SGML entities
- if (HTML)
- {
- gsub(/>/, "\\>", s)
- gsub(/</, "\\<", s)
- gsub(/"/, "\\"", s)
- }
- gsub(/[$][$]/,"$$",s) # change display math to triple dollars for split
- n = split(s,parts,/[$]/)# split into non-math (odd) and math (even) parts
- s = ""
- for (k = 1; k <= n; ++k) # unbrace non-math part, leaving math mode intact
- s = s ((k > 1) ? "$" : "") \
- ((k % 2) ? strip_braces(TeX_to_HTML_nonmath(parts[k])) : \
- TeX_to_HTML_math(parts[k]))
- gsub(/[$][$][$]/,"$$",s) # restore display math
- return (s)
- }
- function TeX_to_HTML_math(s)
- {
- # Mostly a dummy for now, but HTML 3 could support some math translation
- gsub(/\\&/,"\\&",s) # reduce TeX ampersands to SGML entities
- return (s)
- }
- function TeX_to_HTML_nonmath(s)
- {
- if (index(s,"\\") > 0) # important optimization
- {
- gsub(/\\slash +/,"/",s) # replace TeX slashes with conventional ones
- gsub(/ *\\emdash +/," --- ",s) # replace BibNet emdashes with conventional ones
- gsub(/\\%/,"%",s) # reduce TeX percents to conventional ones
- gsub(/\\[$]/,"$",s) # reduce TeX dollars to conventional ones
- gsub(/\\#/,"#",s) # reduce TeX sharps to conventional ones
- if (HTML) # translate TeX markup to HTML
- {
- gsub(/\\&/,"\\&",s) # reduce TeX ampersands to SGML entities
- s = html_accents(s)
- s = html_fonts(s)
- }
- else # plain ASCII text output: discard all TeX markup
- {
- gsub(/\\\&/, "\\&", s) # reduce TeX ampersands to conventional ones
- gsub(/\\[a-z][a-z] +/,"",s) # remove TeX font changes
- gsub(/\\[^A-Za-z]/,"",s) # remove remaining TeX control symbols
- }
- }
- return (s)
- }
- function trim(s)
- {
- gsub(/^[ \t]+/,"",s)
- gsub(/[ \t]+$/,"",s)
- return (s)
- }
- function vol_no_month_year()
- {
- return ("Volume " wrap(Volume) ", Number " wrap(Number) ", " wrap(Month) ", " wrap(Year))
- }
- function wrap(value)
- {
- return (HTML ? ("<STRONG>" value "</STRONG>") : value)
- }