PageRenderTime 64ms CodeModel.GetById 14ms app.highlight 35ms RepoModel.GetById 1ms app.codeStats 0ms

/mcs/class/referencesource/System.Web/Security/AntiXss/HtmlParameterEncoder.cs

https://github.com/pruiz/mono
C# | 239 lines | 106 code | 42 blank | 91 comment | 20 complexity | ef185bd209d487409dded2627f14f8c0 MD5 | raw file
Possible License(s): LGPL-2.0, MPL-2.0-no-copyleft-exception, CC-BY-SA-3.0, GPL-2.0
  1//------------------------------------------------------------------------------
  2// <copyright file="HtmlParameterEncoder.cs" company="Microsoft">
  3//     Copyright (c) Microsoft Corporation.  All rights reserved.
  4// </copyright>
  5//------------------------------------------------------------------------------
  6
  7namespace System.Web.Security.AntiXss {
  8    using System;
  9    using System.Collections;
 10    using System.Text;
 11    using System.Threading;
 12
 13    /// <summary>
 14    /// The type of space encoding to use.
 15    /// </summary>
 16    internal enum EncodingType {
 17        /// <summary>
 18        /// Encode spaces for use in query strings
 19        /// </summary>
 20        QueryString = 1,
 21
 22        /// <summary>
 23        /// Encode spaces for use in form data
 24        /// </summary>
 25        HtmlForm = 2
 26    }
 27
 28    /// <summary>
 29    /// Provides Html Parameter Encoding methods.
 30    /// </summary>
 31    internal static class HtmlParameterEncoder {
 32
 33        /// <summary>
 34        /// The value to use when encoding a space for query strings.
 35        /// </summary>
 36        private static readonly char[] QueryStringSpace = "%20".ToCharArray();
 37
 38        /// <summary>
 39        /// The value to use when encoding a space for form data.
 40        /// </summary>
 41        private static readonly char[] FormStringSpace = "+".ToCharArray();
 42
 43        /// <summary>
 44        /// The values to output for each character.
 45        /// </summary>
 46        private static Lazy<char[][]> characterValuesLazy = new Lazy<char[][]>(InitialiseSafeList);
 47
 48        /// <summary>
 49        /// Encodes a string for query string encoding and returns the encoded string.
 50        /// </summary>
 51        /// <param name="s">The text to URL-encode.</param>
 52        /// <param name="encoding">The encoding for the text parameter.</param>
 53        /// <returns>The URL-encoded text.</returns>
 54        /// <remarks>URL encoding ensures that all browsers will correctly transmit text in URL strings. 
 55        /// Characters such as a question mark (?), ampersand (&amp;), slash mark (/), and spaces might be truncated or corrupted by some browsers. 
 56        /// As a result, these characters must be encoded in &lt;a&gt; tags or in query strings where the strings can be re-sent by a browser 
 57        /// in a request string.</remarks>
 58        /// <exception cref="ArgumentNullException">Thrown if the encoding is null.</exception>
 59        internal static string QueryStringParameterEncode(string s, Encoding encoding) {
 60            return FormQueryEncode(s, encoding, EncodingType.QueryString);
 61        }
 62
 63        /// <summary>
 64        /// Encodes a string for form URL encoding and returns the encoded string.
 65        /// </summary>
 66        /// <param name="s">The text to URL-encode.</param>
 67        /// <param name="encoding">The encoding for the text parameter.</param>
 68        /// <returns>The URL-encoded text.</returns>
 69        /// <remarks>URL encoding ensures that all browsers will correctly transmit text in URL strings. 
 70        /// Characters such as a question mark (?), ampersand (&amp;), slash mark (/), and spaces might be truncated or corrupted by some browsers. 
 71        /// As a result, these characters must be encoded in &lt;a&gt; tags or in query strings where the strings can be re-sent by a browser 
 72        /// in a request string.</remarks>
 73        /// <exception cref="ArgumentNullException">Thrown if the encoding is null.</exception>
 74        internal static string FormStringParameterEncode(string s, Encoding encoding) {
 75            return FormQueryEncode(s, encoding, EncodingType.HtmlForm);
 76        }
 77
 78        /// <summary>
 79        /// Encodes a string for Query String or Form Data encoding.
 80        /// </summary>
 81        /// <param name="s">The text to URL-encode.</param>
 82        /// <param name="encoding">The encoding for the text parameter.</param>
 83        /// <param name="encodingType">The encoding type to use.</param>
 84        /// <returns>The encoded text.</returns>
 85        private static string FormQueryEncode(string s, Encoding encoding, EncodingType encodingType) {
 86            return FormQueryEncode(s, encoding, encodingType, characterValuesLazy);
 87        }
 88
 89        private static string FormQueryEncode(string s, Encoding encoding, EncodingType encodingType, Lazy<char[][]> characterValuesLazy) {
 90            if (string.IsNullOrEmpty(s)) {
 91                return s;
 92            }
 93
 94            if (encoding == null) {
 95                throw new ArgumentNullException("encoding");
 96            }
 97
 98            var characterValues = characterValuesLazy.Value;
 99
100            // RFC 3986 states strings must be converted to their UTF8 value before URL encoding.
101            // See http://tools.ietf.org/html/rfc3986
102            // Conversion to char[] keeps null characters inline.
103            byte[] utf8Bytes = encoding.GetBytes(s.ToCharArray());
104            char[] encodedInput = new char[utf8Bytes.Length * 3]; // Each byte can potentially be encoded as %xx
105            int outputLength = 0;
106
107            for (int characterPosition = 0; characterPosition < utf8Bytes.Length; characterPosition++) {
108                byte currentCharacter = utf8Bytes[characterPosition];
109
110                if (currentCharacter == 0x00 || currentCharacter == 0x20 || currentCharacter > characterValues.Length || characterValues[currentCharacter] != null) {
111                    // character needs to be encoded
112                    char[] encodedCharacter;
113
114                    if (currentCharacter == 0x20) {
115                        switch (encodingType) {
116                            case EncodingType.QueryString:
117                                encodedCharacter = QueryStringSpace;
118                                break;
119
120                            // Special case for Html Form data, from http://www.w3.org/TR/html401/appendix/notes.html#non-ascii-chars
121                            case EncodingType.HtmlForm:
122                                encodedCharacter = FormStringSpace;
123                                break;
124
125                            default:
126                                throw new ArgumentOutOfRangeException("encodingType");
127                        }
128                    }
129                    else {
130                        encodedCharacter = characterValues[currentCharacter];
131                    }
132
133                    for (int j = 0; j < encodedCharacter.Length; j++) {
134                        encodedInput[outputLength++] = encodedCharacter[j];
135                    }
136                }
137                else {
138                    // character does not need encoding
139                    encodedInput[outputLength++] = (char)currentCharacter;
140                }
141            }
142
143            return new string(encodedInput, 0, outputLength);
144        }
145
146        /// <summary>
147        /// Initializes the HTML safe list.
148        /// </summary>
149        private static char[][] InitialiseSafeList() {
150            char[][] result = SafeList.Generate(255, SafeList.PercentThenHexValueGenerator);
151            SafeList.PunchSafeList(ref result, UrlParameterSafeList());
152            return result;
153        }
154
155        /// <summary>
156        /// Provides the safe characters for URL parameter encoding.
157        /// </summary>
158        /// <returns>The safe characters for URL parameter encoding.</returns>
159        private static IEnumerable UrlParameterSafeList() {
160            // Hyphen
161            yield return 0x2D;
162
163            // Full stop/period
164            yield return 0x2E;
165
166            // Digits
167            for (int i = 0x30; i <= 0x39; i++) {
168                yield return i;
169            }
170
171            // Upper case alphabet
172            for (int i = 0x41; i <= 0x5A; i++) {
173                yield return i;
174            }
175
176            // Underscore
177            yield return 0x5F;
178
179            // Lower case alphabet
180            for (int i = 0x61; i <= 0x7A; i++) {
181                yield return i;
182            }
183
184            // Tilde
185            yield return 0x7E;
186        }
187
188        #region UrlPathEncode Helpers
189
190        /// <summary>
191        /// The values to output for each character.
192        /// </summary>
193        private static Lazy<char[][]> pathCharacterValuesLazy = new Lazy<char[][]>(InitialisePathSafeList);
194
195        internal static string UrlPathEncode(string s, Encoding encoding) {
196            return FormQueryEncode(s, encoding, EncodingType.QueryString, pathCharacterValuesLazy);
197        }
198
199        /// <summary>
200        /// Initializes the HTML safe list.
201        /// </summary>
202        private static char[][] InitialisePathSafeList() {
203            char[][] result = SafeList.Generate(255, SafeList.PercentThenHexValueGenerator);
204            SafeList.PunchSafeList(ref result, UrlPathSafeList());
205            return result;
206        }
207
208        /// <summary>
209        /// Provides the safe characters for URL path encoding.
210        /// </summary>
211        /// <returns>The safe characters for URL path encoding.</returns>
212        private static IEnumerable UrlPathSafeList() {
213
214            foreach (var c in UrlParameterSafeList()) {
215                yield return c;
216            }
217
218            // Hash
219            yield return 0x23;
220
221            // Percent
222            yield return 0x25;
223
224            // Forward slash
225            yield return 0x2F;
226
227            // Backwards slash
228            yield return 0x5C;
229
230            // Left parenthesis
231            yield return 0x28;
232
233            //Right parenthesis
234            yield return 0x29;
235        }
236
237        #endregion
238    }
239}