PageRenderTime 30ms CodeModel.GetById 22ms app.highlight 5ms RepoModel.GetById 1ms app.codeStats 0ms

/www/tags/NOV_07_2009/htdocs/42docs/users-guide/regexps.xml

#
XML | 127 lines | 123 code | 1 blank | 3 comment | 0 complexity | c65f8e6d59d3f65c31c3ca80f857e8c2 MD5 | raw file
  1<!-- jEdit buffer-local properties: -->
  2<!-- :indentSize=1:noTabs=yes: -->
  3<!-- :xml.root=users-guide.xml: -->
  4
  5<appendix id="regexps"><title>Regular Expressions</title>
  6 <para>
  7  jEdit uses regular expressions to implement inexact search and replace.
  8  A regular expression consists of a string where some
  9  characters are given special meaning with regard to pattern matching.
 10 </para>
 11 <para>
 12  Within a regular expression, the following characters have special meaning:
 13 </para>
 14 <bridgehead renderas="sect3">Positional Operators</bridgehead>
 15 <itemizedlist>
 16  <listitem><para><literal>^</literal> matches at the beginning of a line</para></listitem>
 17  <listitem><para><literal>$</literal> matches at the end of a line</para></listitem>
 18  <listitem><para><literal>\b</literal> matches at a word break</para></listitem>
 19  <listitem><para><literal>\B</literal> matches at a non-word break</para></listitem>
 20  <listitem><para><literal>\&lt;</literal> matches at the start of a word</para></listitem>
 21  <listitem><para><literal>\&gt;</literal> matches at the end of a word</para></listitem>
 22 </itemizedlist>
 23 <bridgehead renderas="sect3">One-Character Operators</bridgehead>
 24 <itemizedlist>
 25  <listitem><para><literal>.</literal> matches any single character</para></listitem>
 26  <listitem><para><literal>\d</literal> matches any decimal digit</para></listitem>
 27  <listitem><para><literal>\D</literal> matches any non-digit</para></listitem>
 28  <listitem><para><literal>\n</literal> matches the newline character</para></listitem>
 29  <listitem><para><literal>\s</literal> matches any whitespace character</para></listitem>
 30  <listitem><para><literal>\S</literal> matches any non-whitespace character</para></listitem>
 31  <listitem><para><literal>\t</literal> matches a horizontal tab character</para></listitem>
 32  <listitem><para><literal>\w</literal> matches any word (alphanumeric) character</para></listitem>
 33  <listitem><para><literal>\W</literal> matches any non-word (alphanumeric)
 34  character</para></listitem>
 35  <listitem><para><literal>\\</literal> matches the backslash
 36  (<quote>\</quote>) character</para></listitem>
 37 </itemizedlist>
 38 <bridgehead renderas="sect3">Character Class Operator</bridgehead>
 39 <itemizedlist>
 40  <listitem><para><literal>[<replaceable>abc</replaceable>]</literal> matches
 41  any character in
 42  the set <replaceable>a</replaceable>, <replaceable>b</replaceable> or
 43  <replaceable>c</replaceable></para></listitem>
 44  <listitem><para><literal>[^<replaceable>abc</replaceable>]</literal> matches
 45  any character not
 46  in the set <replaceable>a</replaceable>, <replaceable>b</replaceable> or
 47  <replaceable>c</replaceable></para></listitem>
 48  <listitem><para><literal>[<replaceable>a-z</replaceable>]</literal> matches
 49  any character in the
 50  range <replaceable>a</replaceable> to <replaceable>z</replaceable>, inclusive.
 51  A leading or trailing dash will be interpreted literally</para></listitem>
 52  <listitem><para><literal>[[:alnum:]]</literal> matches any alphanumeric
 53  character</para></listitem>
 54  <listitem><para><literal>[[:alpha:]]</literal> matches any alphabetical character</para></listitem>
 55  <listitem><para><literal>[[:blank:]]</literal> matches a space or horizontal tab</para></listitem>
 56  <listitem><para><literal>[[:cntrl:]]</literal> matches a control character</para></listitem>
 57  <listitem><para><literal>[[:digit:]]</literal> matches a decimal digit</para></listitem>
 58  <listitem><para><literal>[[:graph:]]</literal> matches a non-space, non-control character</para></listitem>
 59  <listitem><para><literal>[[:lower:]]</literal> matches a lowercase letter</para></listitem>
 60  <listitem><para><literal>[[:print:]]</literal> same as <literal>[[:graph:]]</literal>, but also space and tab</para></listitem>
 61  <listitem><para><literal>[[:punct:]]</literal> matches a punctuation character</para></listitem>
 62  <listitem><para><literal>[[:space:]]</literal> matches any whitespace character, including newlines</para></listitem>
 63  <listitem><para><literal>[[:upper:]]</literal> matches an uppercase letter</para></listitem>
 64  <listitem><para><literal>[[:xdigit:]]</literal> matches a valid hexadecimal digit</para></listitem>
 65 </itemizedlist>
 66 <bridgehead renderas="sect3">Subexpressions and Backreferences</bridgehead>
 67 <itemizedlist>
 68  <listitem><para><literal>(<replaceable>abc</replaceable>)</literal> matches
 69  whatever the expression
 70  <replaceable>abc</replaceable> would match, and saves it as a subexpression.
 71  Also used for grouping</para></listitem>
 72  <listitem><para><literal>(?:<replaceable>...</replaceable>)</literal> pure
 73  grouping operator, does not
 74  save contents</para></listitem>
 75  <listitem><para><literal>(?#<replaceable>...</replaceable>)</literal> embedded
 76  comment, ignored by engine</para></listitem>
 77  <listitem><para><literal>(?=<replaceable>...</replaceable>)</literal> positive
 78  lookahead; the regular expression will match if the text in the brackets
 79  matches, but that text will not be considered part of the match</para></listitem>
 80  <listitem><para><literal>(?!<replaceable>...</replaceable>)</literal> negative
 81  lookahead; the regular expression will match if the text in the brackets
 82  does not
 83  match, and that text will not be considered part of the match</para></listitem>
 84  <listitem><para><literal>\<replaceable>n</replaceable></literal> where 0 &lt;
 85  <replaceable>n</replaceable> &lt; 10,
 86  matches the same thing the <replaceable>n</replaceable>th
 87  subexpression matched. Can only be used in the search string</para></listitem>
 88  <listitem><para><literal>$<replaceable>n</replaceable></literal> where 0 &lt;
 89  <replaceable>n</replaceable> &lt; 10,
 90  substituted with the text matched by the <replaceable>n</replaceable>th
 91  subexpression. Can only be used in the replacement string</para></listitem>
 92 </itemizedlist>
 93 <bridgehead renderas="sect3">Branching (Alternation) Operator</bridgehead>
 94 <itemizedlist>
 95  <listitem><para><literal><replaceable>a</replaceable>|<replaceable>b</replaceable></literal>
 96  matches whatever the expression <replaceable>a</replaceable> would match, or whatever
 97  the expression <replaceable>b</replaceable> would match.</para></listitem>
 98 </itemizedlist>
 99 <bridgehead renderas="sect3">Repeating Operators</bridgehead>
100 <para>
101  These symbols operate on the previous atomic expression.
102 </para>
103 <itemizedlist>
104  <listitem><para><literal>?</literal> matches the preceding expression or the
105  null string</para></listitem>
106  <listitem><para><literal>*</literal> matches the null string or any number of repetitions
107  of the preceding expression</para></listitem>
108  <listitem><para><literal>+</literal> matches one or more repetitions of the preceding
109  expression</para></listitem>
110  <listitem><para><literal>{<replaceable>m</replaceable>}</literal> matches exactly
111  <replaceable>m</replaceable>
112  repetitions of the one-character expression</para></listitem>
113  <listitem><para><literal>{<replaceable>m</replaceable>,<replaceable>n</replaceable>}</literal>
114  matches between
115  <replaceable>m</replaceable> and <replaceable>n</replaceable> repetitions of the preceding
116  expression, inclusive</para></listitem>
117  <listitem><para><literal>{<replaceable>m</replaceable>,}</literal> matches
118  <replaceable>m</replaceable> or more
119  repetitions of the preceding expression</para></listitem>
120 </itemizedlist>
121 <bridgehead renderas="sect3">Stingy (Minimal) Matching</bridgehead>
122 <para>
123  If a repeating operator (above) is immediately followed by a
124  <literal>?</literal>, the repeating operator will stop at the smallest
125  number of repetitions that can complete the rest of the match.
126 </para>
127</appendix>