/mcs/class/referencesource/System.Web/Security/AntiXss/HtmlParameterEncoder.cs
C# | 239 lines | 106 code | 42 blank | 91 comment | 20 complexity | ef185bd209d487409dded2627f14f8c0 MD5 | raw file
Possible License(s): LGPL-2.0, MPL-2.0-no-copyleft-exception, CC-BY-SA-3.0, GPL-2.0
1//------------------------------------------------------------------------------
2// <copyright file="HtmlParameterEncoder.cs" company="Microsoft">
3// Copyright (c) Microsoft Corporation. All rights reserved.
4// </copyright>
5//------------------------------------------------------------------------------
6
7namespace System.Web.Security.AntiXss {
8 using System;
9 using System.Collections;
10 using System.Text;
11 using System.Threading;
12
13 /// <summary>
14 /// The type of space encoding to use.
15 /// </summary>
16 internal enum EncodingType {
17 /// <summary>
18 /// Encode spaces for use in query strings
19 /// </summary>
20 QueryString = 1,
21
22 /// <summary>
23 /// Encode spaces for use in form data
24 /// </summary>
25 HtmlForm = 2
26 }
27
28 /// <summary>
29 /// Provides Html Parameter Encoding methods.
30 /// </summary>
31 internal static class HtmlParameterEncoder {
32
33 /// <summary>
34 /// The value to use when encoding a space for query strings.
35 /// </summary>
36 private static readonly char[] QueryStringSpace = "%20".ToCharArray();
37
38 /// <summary>
39 /// The value to use when encoding a space for form data.
40 /// </summary>
41 private static readonly char[] FormStringSpace = "+".ToCharArray();
42
43 /// <summary>
44 /// The values to output for each character.
45 /// </summary>
46 private static Lazy<char[][]> characterValuesLazy = new Lazy<char[][]>(InitialiseSafeList);
47
48 /// <summary>
49 /// Encodes a string for query string encoding and returns the encoded string.
50 /// </summary>
51 /// <param name="s">The text to URL-encode.</param>
52 /// <param name="encoding">The encoding for the text parameter.</param>
53 /// <returns>The URL-encoded text.</returns>
54 /// <remarks>URL encoding ensures that all browsers will correctly transmit text in URL strings.
55 /// Characters such as a question mark (?), ampersand (&), slash mark (/), and spaces might be truncated or corrupted by some browsers.
56 /// As a result, these characters must be encoded in <a> tags or in query strings where the strings can be re-sent by a browser
57 /// in a request string.</remarks>
58 /// <exception cref="ArgumentNullException">Thrown if the encoding is null.</exception>
59 internal static string QueryStringParameterEncode(string s, Encoding encoding) {
60 return FormQueryEncode(s, encoding, EncodingType.QueryString);
61 }
62
63 /// <summary>
64 /// Encodes a string for form URL encoding and returns the encoded string.
65 /// </summary>
66 /// <param name="s">The text to URL-encode.</param>
67 /// <param name="encoding">The encoding for the text parameter.</param>
68 /// <returns>The URL-encoded text.</returns>
69 /// <remarks>URL encoding ensures that all browsers will correctly transmit text in URL strings.
70 /// Characters such as a question mark (?), ampersand (&), slash mark (/), and spaces might be truncated or corrupted by some browsers.
71 /// As a result, these characters must be encoded in <a> tags or in query strings where the strings can be re-sent by a browser
72 /// in a request string.</remarks>
73 /// <exception cref="ArgumentNullException">Thrown if the encoding is null.</exception>
74 internal static string FormStringParameterEncode(string s, Encoding encoding) {
75 return FormQueryEncode(s, encoding, EncodingType.HtmlForm);
76 }
77
78 /// <summary>
79 /// Encodes a string for Query String or Form Data encoding.
80 /// </summary>
81 /// <param name="s">The text to URL-encode.</param>
82 /// <param name="encoding">The encoding for the text parameter.</param>
83 /// <param name="encodingType">The encoding type to use.</param>
84 /// <returns>The encoded text.</returns>
85 private static string FormQueryEncode(string s, Encoding encoding, EncodingType encodingType) {
86 return FormQueryEncode(s, encoding, encodingType, characterValuesLazy);
87 }
88
89 private static string FormQueryEncode(string s, Encoding encoding, EncodingType encodingType, Lazy<char[][]> characterValuesLazy) {
90 if (string.IsNullOrEmpty(s)) {
91 return s;
92 }
93
94 if (encoding == null) {
95 throw new ArgumentNullException("encoding");
96 }
97
98 var characterValues = characterValuesLazy.Value;
99
100 // RFC 3986 states strings must be converted to their UTF8 value before URL encoding.
101 // See http://tools.ietf.org/html/rfc3986
102 // Conversion to char[] keeps null characters inline.
103 byte[] utf8Bytes = encoding.GetBytes(s.ToCharArray());
104 char[] encodedInput = new char[utf8Bytes.Length * 3]; // Each byte can potentially be encoded as %xx
105 int outputLength = 0;
106
107 for (int characterPosition = 0; characterPosition < utf8Bytes.Length; characterPosition++) {
108 byte currentCharacter = utf8Bytes[characterPosition];
109
110 if (currentCharacter == 0x00 || currentCharacter == 0x20 || currentCharacter > characterValues.Length || characterValues[currentCharacter] != null) {
111 // character needs to be encoded
112 char[] encodedCharacter;
113
114 if (currentCharacter == 0x20) {
115 switch (encodingType) {
116 case EncodingType.QueryString:
117 encodedCharacter = QueryStringSpace;
118 break;
119
120 // Special case for Html Form data, from http://www.w3.org/TR/html401/appendix/notes.html#non-ascii-chars
121 case EncodingType.HtmlForm:
122 encodedCharacter = FormStringSpace;
123 break;
124
125 default:
126 throw new ArgumentOutOfRangeException("encodingType");
127 }
128 }
129 else {
130 encodedCharacter = characterValues[currentCharacter];
131 }
132
133 for (int j = 0; j < encodedCharacter.Length; j++) {
134 encodedInput[outputLength++] = encodedCharacter[j];
135 }
136 }
137 else {
138 // character does not need encoding
139 encodedInput[outputLength++] = (char)currentCharacter;
140 }
141 }
142
143 return new string(encodedInput, 0, outputLength);
144 }
145
146 /// <summary>
147 /// Initializes the HTML safe list.
148 /// </summary>
149 private static char[][] InitialiseSafeList() {
150 char[][] result = SafeList.Generate(255, SafeList.PercentThenHexValueGenerator);
151 SafeList.PunchSafeList(ref result, UrlParameterSafeList());
152 return result;
153 }
154
155 /// <summary>
156 /// Provides the safe characters for URL parameter encoding.
157 /// </summary>
158 /// <returns>The safe characters for URL parameter encoding.</returns>
159 private static IEnumerable UrlParameterSafeList() {
160 // Hyphen
161 yield return 0x2D;
162
163 // Full stop/period
164 yield return 0x2E;
165
166 // Digits
167 for (int i = 0x30; i <= 0x39; i++) {
168 yield return i;
169 }
170
171 // Upper case alphabet
172 for (int i = 0x41; i <= 0x5A; i++) {
173 yield return i;
174 }
175
176 // Underscore
177 yield return 0x5F;
178
179 // Lower case alphabet
180 for (int i = 0x61; i <= 0x7A; i++) {
181 yield return i;
182 }
183
184 // Tilde
185 yield return 0x7E;
186 }
187
188 #region UrlPathEncode Helpers
189
190 /// <summary>
191 /// The values to output for each character.
192 /// </summary>
193 private static Lazy<char[][]> pathCharacterValuesLazy = new Lazy<char[][]>(InitialisePathSafeList);
194
195 internal static string UrlPathEncode(string s, Encoding encoding) {
196 return FormQueryEncode(s, encoding, EncodingType.QueryString, pathCharacterValuesLazy);
197 }
198
199 /// <summary>
200 /// Initializes the HTML safe list.
201 /// </summary>
202 private static char[][] InitialisePathSafeList() {
203 char[][] result = SafeList.Generate(255, SafeList.PercentThenHexValueGenerator);
204 SafeList.PunchSafeList(ref result, UrlPathSafeList());
205 return result;
206 }
207
208 /// <summary>
209 /// Provides the safe characters for URL path encoding.
210 /// </summary>
211 /// <returns>The safe characters for URL path encoding.</returns>
212 private static IEnumerable UrlPathSafeList() {
213
214 foreach (var c in UrlParameterSafeList()) {
215 yield return c;
216 }
217
218 // Hash
219 yield return 0x23;
220
221 // Percent
222 yield return 0x25;
223
224 // Forward slash
225 yield return 0x2F;
226
227 // Backwards slash
228 yield return 0x5C;
229
230 // Left parenthesis
231 yield return 0x28;
232
233 //Right parenthesis
234 yield return 0x29;
235 }
236
237 #endregion
238 }
239}