PageRenderTime 174ms CodeModel.GetById 17ms app.highlight 148ms RepoModel.GetById 1ms app.codeStats 1ms

/src/support/z_string.erl

https://code.google.com/p/zotonic/
Erlang | 856 lines | 675 code | 88 blank | 93 comment | 21 complexity | a31d95872c65a2562832bd04ff89e3c8 MD5 | raw file
  1%% @author Marc Worrell <marc@worrell.nl>
  2%% @copyright 2009-2010 Marc Worrell
  3%% Date: 2009-04-26
  4%% @doc String related functions
  5%% @todo Make this UTF-8 safe
  6
  7%% @todo Check valid chars for filenames, allow chinese, japanese, etc?
  8%% CJK Unified Ideographs Extension A: Range: 3400-4DBF
  9%% CJK Unified Ideographs: Range: 4E00-9FAF
 10%% Kangxi Radicals: Range 2F00-2FDF
 11%% See also: http://www.utf8-chartable.de/
 12
 13%% Copyright 2009-2010 Marc Worrell
 14%%
 15%% Licensed under the Apache License, Version 2.0 (the "License");
 16%% you may not use this file except in compliance with the License.
 17%% You may obtain a copy of the License at
 18%% 
 19%%     http://www.apache.org/licenses/LICENSE-2.0
 20%% 
 21%% Unless required by applicable law or agreed to in writing, software
 22%% distributed under the License is distributed on an "AS IS" BASIS,
 23%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 24%% See the License for the specific language governing permissions and
 25%% limitations under the License.
 26
 27-module(z_string).
 28-author("Marc Worrell <marc@worrell.nl").
 29
 30%% interface functions
 31-export([
 32    trim/1,
 33    trim_left/1,
 34    trim_right/1,
 35    trim/2,
 36    trim_left/2,
 37    trim_right/2,
 38    trim_left_func/2,
 39    is_string/1,
 40    first_char/1,
 41    last_char/1,
 42    unquote/1,
 43    unquote/2,
 44    nospaces/1,
 45    line/1,
 46    to_rootname/1,
 47    to_name/1,
 48    to_slug/1,
 49    to_lower/1,
 50    to_upper/1,
 51    replace/3,
 52    truncate/2,
 53    truncate/3,
 54    truncatewords/2,
 55    truncatewords/3,
 56    split_lines/1,
 57    escape_ical/1,
 58    starts_with/2,
 59    ends_with/2,
 60    contains/2,
 61    split/2,
 62    test/0
 63]).
 64
 65-include_lib("include/zotonic.hrl").
 66
 67
 68%% @doc Remove whitespace at the start and end of the string
 69trim(B) when is_binary(B) ->
 70	trim_right(trim_left(B));
 71trim(L) when is_list(L) ->
 72	binary_to_list(trim(iolist_to_binary(L))).
 73
 74%% @doc Remove all occurences of a character at the start and end of a string.
 75trim(B, Char) when is_binary(B) ->
 76	trim_right(trim_left(B, Char), Char);
 77trim(L, Char) when is_list(L) ->
 78	binary_to_list(trim(iolist_to_binary(L), Char)).
 79
 80
 81%% @doc Remove whitespace at the start the string
 82trim_left(S) ->
 83    trim_left_func(S, fun(C) -> C =< 32 end).
 84
 85%% @doc Remove all occurences of a char at the start of a string
 86trim_left(S, Char) ->
 87    trim_left_func(S, fun(C) -> C == Char end).
 88
 89
 90trim_left_func(<<Char, Rest/binary>> = Bin, F) ->
 91    case F(Char) of
 92        true -> trim_left_func(Rest, F);
 93        false -> Bin
 94    end;
 95trim_left_func([Char|Rest] = L, F) when is_integer(Char) ->
 96    case F(Char) of
 97        true -> trim_left(Rest, F);
 98        false -> L
 99    end;
100trim_left_func([L|Rest], F) when is_list(L); is_binary(L) ->
101    case trim_left_func(L, F) of
102        [] -> trim_left_func(Rest, F);
103        <<>> -> trim_left_func(Rest, F);
104        Other -> [Other|Rest]
105    end;
106trim_left_func(Other, _F) ->
107    Other.
108
109    
110	
111%% @doc Remove whitespace at the end of the string
112trim_right(B) when is_binary(B) ->
113	trim_right(B, <<>>, <<>>);
114trim_right(L) ->
115	binary_to_list(trim_right(iolist_to_binary(L))).
116
117	trim_right(<<C, Rest/binary>>, WS, Acc) ->
118		case C of
119			W when W =< 32 -> trim_right(Rest, <<WS/binary, C>>, Acc);
120			_ -> trim_right(Rest, <<>>, <<Acc/binary, WS/binary, C>>)
121		end;
122	trim_right(<<>>, _WS, Acc) ->
123		Acc.
124
125%% @doc Remove all occurences of a char at the end of the string
126trim_right(B, Char) when is_binary(B) ->
127	trim_right(B, Char, <<>>, <<>>);
128trim_right(L, Char) ->
129	binary_to_list(trim_right(iolist_to_binary(L), Char)).
130
131	trim_right(<<C, Rest/binary>>, Char, WS, Acc) ->
132		case C of
133			Char -> trim_right(Rest, Char, <<WS/binary, C>>, Acc);
134			_ -> trim_right(Rest, Char, <<>>, <<Acc/binary, WS/binary, C>>)
135		end;
136	trim_right(<<>>, _Char, _WS, Acc) ->
137		Acc.
138
139%% @doc Check if the variable is a one dimensional list, probably a string
140is_string([]) -> 
141    true;
142is_string([C|Rest]) when 
143		is_integer(C)
144		andalso C =< 255
145		andalso (C >= 32 orelse C == 9 orelse C == 10 orelse C == 12 orelse C == 13) ->
146    is_string(Rest);
147is_string(_) -> 
148    false.
149
150
151%% @doc Return the first character of a string.
152%% @todo Make this UTF-8 safe
153first_char([]) -> undefined;
154first_char([H|T]) when is_integer(H) ->
155    truncate([H|T], 1, "");
156first_char(<<>>) -> undefined;
157first_char(<<C, _/binary>>) -> C.
158
159
160%% @doc Return the last character of a string
161last_char([]) -> undefined;
162last_char([C]) -> C;
163last_char([_|R]) -> last_char(R);
164last_char(<<>>) -> undefined;
165last_char(<<C>>) -> C;
166last_char(<<_, R/binary>>) -> last_char(R).
167
168
169%% @doc Remove the first and last char if they are double quotes.
170unquote(S) ->
171    unquote(S, $").
172
173unquote(S, Q) ->
174    case S of
175        <<Q, R/binary>> -> unquote1(R, <<>>, Q, S);
176        [Q|R] -> unquote1(R, [], Q, S);
177        _ -> S
178    end.
179    
180    unquote1([], _Acc, _Q, S) -> S;
181    unquote1([Q], Acc, Q, _S) -> lists:reverse(Acc);
182    unquote1([H|T], Acc, Q, S) -> unquote1(T, [H|Acc], Q, S);
183
184    unquote1(<<>>, _Acc, _Q, S) -> S;
185    unquote1(<<Q>>, Acc, Q, _S) -> Acc;
186    unquote1(<<C,R/binary>>, Acc, Q, S) -> unquote1(R, <<Acc/binary, C>>, Q, S).
187
188
189%% @doc Remove all spaces and control characters from a string.
190nospaces(B) when is_binary(B) ->
191    nospaces(binary_to_list(B));
192nospaces(L) ->
193    nospaces(L, []).
194
195nospaces([], Acc) ->
196    lists:reverse(Acc);
197nospaces([C|Rest], Acc) when C =< 32 ->
198    nospaces(Rest, Acc);
199nospaces([C|Rest], Acc) ->
200    nospaces(Rest, [C|Acc]).
201
202
203
204%% @doc Make sure that the string is on one line only, replace control characters with spaces
205line(B) when is_binary(B) ->
206    line(binary_to_list(B));
207line(L) ->
208    line1(L, []).
209    
210    line1([], Acc) ->
211        lists:reverse(Acc);
212    line1([H|T], Acc) when H < 32 ->
213        line1(T, [32|Acc]);
214    line1([H|T], Acc) ->
215        line1(T, [H|Acc]).
216
217
218%% @doc Return a lowercase string for the input
219%% @spec to_lower(Value) -> String
220to_lower(B) when is_binary(B) ->
221    to_lower(binary_to_list(B));
222to_lower(A) when is_atom(A) ->
223    to_lower(atom_to_list(A));
224to_lower(L) when is_list(L) ->
225    to_lower(lists:flatten(L), []).
226
227	to_lower([], Acc) -> lists:reverse(Acc);
228	to_lower([H|T], Acc) when H >= $A andalso H =< $Z -> to_lower(T, [H+32|Acc]); 
229	to_lower("Е"++T, Acc) -> to_lower(T, [165,195|Acc]);
230	to_lower("Д"++T, Acc) -> to_lower(T, [164,195|Acc]);
231	to_lower("Б"++T, Acc) -> to_lower(T, [161,195|Acc]);
232	to_lower("А"++T, Acc) -> to_lower(T, [160,195|Acc]);
233	to_lower("Л"++T, Acc) -> to_lower(T, [171,195|Acc]);
234	to_lower("К"++T, Acc) -> to_lower(T, [170,195|Acc]);
235	to_lower("Й"++T, Acc) -> to_lower(T, [169,195|Acc]);
236	to_lower("И"++T, Acc) -> to_lower(T, [168,195|Acc]);
237	to_lower("П"++T, Acc) -> to_lower(T, [175,195|Acc]);
238	to_lower("О"++T, Acc) -> to_lower(T, [174,195|Acc]);
239	to_lower("Н"++T, Acc) -> to_lower(T, [173,195|Acc]);
240	to_lower("М"++T, Acc) -> to_lower(T, [172,195|Acc]);
241	to_lower("Ь"++T, Acc) -> to_lower(T, [188,195|Acc]);
242	to_lower("Ы"++T, Acc) -> to_lower(T, [187,195|Acc]);
243	to_lower("Ъ"++T, Acc) -> to_lower(T, [186,195|Acc]);
244	to_lower("Щ"++T, Acc) -> to_lower(T, [185,195|Acc]);
245	to_lower("Ц"++T, Acc) -> to_lower(T, [182,195|Acc]);
246	to_lower("Ф"++T, Acc) -> to_lower(T, [180,195|Acc]);
247	to_lower("У"++T, Acc) -> to_lower(T, [179,195|Acc]);
248	to_lower("Т"++T, Acc) -> to_lower(T, [178,195|Acc]);
249	to_lower("Ш"++T, Acc) -> to_lower(T, [184,195|Acc]);
250	to_lower("З"++T, Acc) -> to_lower(T, [167,195|Acc]);
251	to_lower("Ж"++T, Acc) -> to_lower(T, [166,195|Acc]);
252	to_lower("Њ"++T, Acc) -> to_lower(T, [147,197|Acc]);
253	% Cyrillic support
254	to_lower("?"++T, Acc) -> to_lower(T, [176,208|Acc]);
255	to_lower("?"++T, Acc) -> to_lower(T, [177,208|Acc]);
256	to_lower("?"++T, Acc) -> to_lower(T, [178,208|Acc]);
257	to_lower("?"++T, Acc) -> to_lower(T, [179,208|Acc]);
258	to_lower("?"++T, Acc) -> to_lower(T, [180,208|Acc]);
259	to_lower("?"++T, Acc) -> to_lower(T, [181,208|Acc]);
260	to_lower("?"++T, Acc) -> to_lower(T, [145,209|Acc]);
261	to_lower("?"++T, Acc) -> to_lower(T, [182,208|Acc]);
262	to_lower("?"++T, Acc) -> to_lower(T, [183,208|Acc]);
263	to_lower("?"++T, Acc) -> to_lower(T, [184,208|Acc]);
264	to_lower("?"++T, Acc) -> to_lower(T, [185,208|Acc]);
265	to_lower("?"++T, Acc) -> to_lower(T, [186,208|Acc]);
266	to_lower("?"++T, Acc) -> to_lower(T, [187,208|Acc]);
267	to_lower("?"++T, Acc) -> to_lower(T, [188,208|Acc]);
268	to_lower("?"++T, Acc) -> to_lower(T, [189,208|Acc]);
269	to_lower("?"++T, Acc) -> to_lower(T, [190,208|Acc]);
270	to_lower("?"++T, Acc) -> to_lower(T, [191,208|Acc]);
271	to_lower("?"++T, Acc) -> to_lower(T, [128,209|Acc]);
272	to_lower("?"++T, Acc) -> to_lower(T, [129,209|Acc]);
273	to_lower("?"++T, Acc) -> to_lower(T, [130,209|Acc]);
274	to_lower("?"++T, Acc) -> to_lower(T, [131,209|Acc]);
275	to_lower("?"++T, Acc) -> to_lower(T, [132,209|Acc]);
276	to_lower("?"++T, Acc) -> to_lower(T, [133,209|Acc]);
277	to_lower("?"++T, Acc) -> to_lower(T, [134,209|Acc]);
278	to_lower("?"++T, Acc) -> to_lower(T, [135,209|Acc]);
279	to_lower("?"++T, Acc) -> to_lower(T, [136,209|Acc]);
280	to_lower("?"++T, Acc) -> to_lower(T, [137,209|Acc]);
281	to_lower("?"++T, Acc) -> to_lower(T, [138,209|Acc]);
282	to_lower("?"++T, Acc) -> to_lower(T, [139,209|Acc]);
283	to_lower("?"++T, Acc) -> to_lower(T, [140,209|Acc]);
284	to_lower("?"++T, Acc) -> to_lower(T, [141,209|Acc]);
285	to_lower("?"++T, Acc) -> to_lower(T, [142,209|Acc]);
286	to_lower("?"++T, Acc) -> to_lower(T, [143,209|Acc]);
287	% Extra Ukrainian characters
288	to_lower("?"++T, Acc) -> to_lower(T, [145,210|Acc]);
289	to_lower("?"++T, Acc) -> to_lower(T, [151,209|Acc]);
290	to_lower("?"++T, Acc) -> to_lower(T, [150,209|Acc]);
291	to_lower("?"++T, Acc) -> to_lower(T, [148,209|Acc]);
292	% Polish support
293	to_lower("?"++T, Acc) -> to_lower(T, [133,196|Acc]);
294	to_lower("?"++T, Acc) -> to_lower(T, [153,196|Acc]);
295	to_lower("?"++T, Acc) -> to_lower(T, [135,196|Acc]);
296	to_lower("?"++T, Acc) -> to_lower(T, [130,197|Acc]);
297	to_lower("?"++T, Acc) -> to_lower(T, [132,197|Acc]);
298	to_lower("?"++T, Acc) -> to_lower(T, [155,197|Acc]);
299	to_lower("?"++T, Acc) -> to_lower(T, [186,197|Acc]);
300	to_lower("?"++T, Acc) -> to_lower(T, [188,197|Acc]);
301    % Turkish support
302	to_lower("?"++T, Acc) -> to_lower(T, [159,197|Acc]);
303	to_lower("?"++T, Acc) -> to_lower(T, [159,196|Acc]);
304	to_lower("?"++T, Acc) -> to_lower(T, [177,196|Acc]);
305	% Other characters are taken as-is
306	to_lower([H|T], Acc) -> to_lower(T, [H|Acc]).
307
308
309%% @doc Return a uppercase string for the input
310%% @spec to_upper(Value) -> String
311to_upper(B) when is_binary(B) ->
312    to_upper(binary_to_list(B));
313to_upper(A) when is_atom(A) ->
314    to_upper(atom_to_list(A));
315to_upper(L) when is_list(L) ->
316    to_upper(lists:flatten(L), []).
317
318	to_upper([], Acc) -> lists:reverse(Acc);
319	to_upper([H|T], Acc) when H >= $a andalso H =< $z -> to_upper(T, [H-32|Acc]); 
320	to_upper("е"++T, Acc) -> to_upper(T, [133,195|Acc]);
321	to_upper("д"++T, Acc) -> to_upper(T, [132,195|Acc]);
322	to_upper("б"++T, Acc) -> to_upper(T, [129,195|Acc]);
323	to_upper("а"++T, Acc) -> to_upper(T, [128,195|Acc]);
324	to_upper("л"++T, Acc) -> to_upper(T, [139,195|Acc]);
325	to_upper("к"++T, Acc) -> to_upper(T, [138,195|Acc]);
326	to_upper("й"++T, Acc) -> to_upper(T, [137,195|Acc]);
327	to_upper("и"++T, Acc) -> to_upper(T, [136,195|Acc]);
328	to_upper("п"++T, Acc) -> to_upper(T, [143,195|Acc]);
329	to_upper("О"++T, Acc) -> to_upper(T, [142,195|Acc]);
330	to_upper("н"++T, Acc) -> to_upper(T, [141,195|Acc]);
331	to_upper("м"++T, Acc) -> to_upper(T, [140,195|Acc]);
332	to_upper("ь"++T, Acc) -> to_upper(T, [156,195|Acc]);
333	to_upper("ы"++T, Acc) -> to_upper(T, [155,195|Acc]);
334	to_upper("ъ"++T, Acc) -> to_upper(T, [154,195|Acc]);
335	to_upper("щ"++T, Acc) -> to_upper(T, [153,195|Acc]);
336	to_upper("ц"++T, Acc) -> to_upper(T, [150,195|Acc]);
337	to_upper("ф"++T, Acc) -> to_upper(T, [148,195|Acc]);
338	to_upper("у"++T, Acc) -> to_upper(T, [147,195|Acc]);
339	to_upper("т"++T, Acc) -> to_upper(T, [146,195|Acc]);
340	to_upper("ш"++T, Acc) -> to_upper(T, [152,195|Acc]);
341	to_upper("з"++T, Acc) -> to_upper(T, [135,195|Acc]);
342	to_upper("ж"++T, Acc) -> to_upper(T, [134,195|Acc]);
343	to_upper("њ"++T, Acc) -> to_upper(T, [146,197|Acc]);
344	% Cyrillic support
345	to_upper("?"++T, Acc) -> to_upper(T, [144,208|Acc]);
346	to_upper("?"++T, Acc) -> to_upper(T, [145,208|Acc]);
347	to_upper("?"++T, Acc) -> to_upper(T, [146,208|Acc]);
348	to_upper("?"++T, Acc) -> to_upper(T, [147,208|Acc]);
349	to_upper("?"++T, Acc) -> to_upper(T, [148,208|Acc]);
350	to_upper("?"++T, Acc) -> to_upper(T, [149,208|Acc]);
351	to_upper("?"++T, Acc) -> to_upper(T, [129,208|Acc]);
352	to_upper("?"++T, Acc) -> to_upper(T, [150,208|Acc]);
353	to_upper("?"++T, Acc) -> to_upper(T, [151,208|Acc]);
354	to_upper("?"++T, Acc) -> to_upper(T, [152,208|Acc]);
355	to_upper("?"++T, Acc) -> to_upper(T, [153,208|Acc]);
356	to_upper("?"++T, Acc) -> to_upper(T, [154,208|Acc]);
357	to_upper("?"++T, Acc) -> to_upper(T, [155,208|Acc]);
358	to_upper("?"++T, Acc) -> to_upper(T, [156,208|Acc]);
359	to_upper("?"++T, Acc) -> to_upper(T, [157,208|Acc]);
360	to_upper("?"++T, Acc) -> to_upper(T, [158,208|Acc]);
361	to_upper("?"++T, Acc) -> to_upper(T, [159,208|Acc]);
362	to_upper("?"++T, Acc) -> to_upper(T, [160,208|Acc]);
363	to_upper("?"++T, Acc) -> to_upper(T, [161,208|Acc]);
364	to_upper("?"++T, Acc) -> to_upper(T, [162,208|Acc]);
365	to_upper("?"++T, Acc) -> to_upper(T, [163,208|Acc]);
366	to_upper("?"++T, Acc) -> to_upper(T, [164,208|Acc]);
367	to_upper("?"++T, Acc) -> to_upper(T, [165,208|Acc]);
368	to_upper("?"++T, Acc) -> to_upper(T, [166,208|Acc]);
369	to_upper("?"++T, Acc) -> to_upper(T, [167,208|Acc]);
370	to_upper("?"++T, Acc) -> to_upper(T, [168,208|Acc]);
371	to_upper("?"++T, Acc) -> to_upper(T, [169,208|Acc]);
372	to_upper("?"++T, Acc) -> to_upper(T, [170,208|Acc]);
373	to_upper("?"++T, Acc) -> to_upper(T, [171,208|Acc]);
374	to_upper("?"++T, Acc) -> to_upper(T, [172,208|Acc]);
375	to_upper("?"++T, Acc) -> to_upper(T, [173,208|Acc]);
376	to_upper("?"++T, Acc) -> to_upper(T, [174,208|Acc]);
377	to_upper("?"++T, Acc) -> to_upper(T, [175,208|Acc]);
378	% Extra Ukrainian characters
379	to_upper("?"++T, Acc) -> to_upper(T, [144,210|Acc]);
380	to_upper("?"++T, Acc) -> to_upper(T, [135,208|Acc]);
381	to_upper("?"++T, Acc) -> to_upper(T, [143,208|Acc]);
382	to_upper("?"++T, Acc) -> to_upper(T, [132,208|Acc]);
383	% Polish support
384	to_upper("?"++T, Acc) -> to_upper(T, [132,196|Acc]);
385	to_upper("?"++T, Acc) -> to_upper(T, [152,196|Acc]);
386	to_upper("?"++T, Acc) -> to_upper(T, [134,196|Acc]);
387	to_upper("?"++T, Acc) -> to_upper(T, [129,197|Acc]);
388	to_upper("?"++T, Acc) -> to_upper(T, [131,197|Acc]);
389	to_upper("?"++T, Acc) -> to_upper(T, [154,197|Acc]);
390	to_upper("?"++T, Acc) -> to_upper(T, [185,197|Acc]);
391	to_upper("?"++T, Acc) -> to_upper(T, [187,197|Acc]);
392	% Turkish support
393	to_upper("?"++T, Acc) -> to_upper(T, [158,197|Acc]);
394	to_upper("?"++T, Acc) -> to_upper(T, [158,196|Acc]);
395	to_upper("?"++T, Acc) -> to_upper(T, [176,196|Acc]);
396
397	% Other chars are taken as-is
398	to_upper([H|T], Acc) -> to_upper(T, [H|Acc]).
399
400%% @doc Filter a filename so that we obtain a basename that is safe to use.
401%% @spec to_rootname(string()) -> string()
402to_rootname(Filename) ->
403    to_slug(filename:rootname(filename:basename(Filename))).
404
405
406%% @doc Map a string to a slug that can be used in the uri of a page. Same as a name, but then with dashes instead of underscores.
407%% @spec to_slug(String) -> String
408to_slug(Title) ->
409    Slug = to_name(Title),
410    [ case C of $_ -> $-; _ -> C end || C <- Slug ].
411
412
413%% @doc Map a string to a value that can be used as a name or slug. Maps all characters to lowercase and remove non digalpha chars
414%% @spec to_name(String) -> String
415to_name({trans, Tr}) ->
416    case proplists:get_value(en, Tr) of
417        undefined -> 
418            case Tr of
419                [{_,V}|_] -> to_name(V);
420                _ -> to_name([])
421            end;
422        V -> to_name(V)
423    end;
424to_name(Name) when is_binary(Name) ->
425    to_name(binary_to_list(Name));
426to_name(Name) when is_atom(Name) ->
427    to_name(atom_to_list(Name));
428to_name(Name) ->
429    to_name(Name, [], 0).
430
431to_name([], Acc, _I) ->
432    case string:strip(lists:reverse(Acc), both, $_) of
433        [] -> "_";
434        Name -> Name
435    end;
436to_name(_, Acc, N) when N >= 80 ->
437    to_name([], Acc, 80);
438to_name([C|T], Acc, I) when C >= $A andalso C =< $Z ->
439    to_name(T, [C+32|Acc], I+1);
440to_name([C|T], Acc, I) when (C >= $a andalso C =< $z) orelse (C >= $0 andalso C =< $9) orelse C =:= $_ ->
441    to_name(T, [C|Acc], I+1);
442to_name("д"++T, Acc, I) -> to_name(T, [$a|Acc], I+1);
443to_name("л"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
444to_name("п"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
445to_name("ь"++T, Acc, I) -> to_name(T, [$u|Acc], I+1);
446to_name("ц"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
447to_name("Д"++T, Acc, I) -> to_name(T, [$a|Acc], I+1);
448to_name("Л"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
449to_name("П"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
450to_name("Ь"++T, Acc, I) -> to_name(T, [$u|Acc], I+1);
451to_name("Ц"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
452to_name("й"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
453to_name("и"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
454to_name("Й"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
455to_name("И"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
456to_name("н"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
457to_name("м"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
458to_name("Н"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
459to_name("М"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
460to_name("ъ"++T, Acc, I) -> to_name(T, [$u|Acc], I+1);
461to_name("щ"++T, Acc, I) -> to_name(T, [$u|Acc], I+1);
462to_name("Ъ"++T, Acc, I) -> to_name(T, [$u|Acc], I+1);
463to_name("Щ"++T, Acc, I) -> to_name(T, [$u|Acc], I+1);
464to_name("у"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
465to_name("т"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
466to_name("У"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
467to_name("Т"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
468to_name("Я"++T, Acc, I) -> to_name(T, [$s,$s|Acc], I+2);
469to_name("з"++T, Acc, I) -> to_name(T, [$c|Acc], I+1);
470to_name("З"++T, Acc, I) -> to_name(T, [$c|Acc], I+1);
471to_name("ш"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
472to_name("Ш"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
473to_name("е"++T, Acc, I) -> to_name(T, [$a|Acc], I+1);
474to_name("Е"++T, Acc, I) -> to_name(T, [$a|Acc], I+1);
475to_name("Ђ"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
476to_name("я"++T, Acc, I) -> to_name(T, [$i,$j|Acc], I+2);
477to_name("@"++T, Acc, I) -> to_name(T, [$_,$t,$a,$_|Acc], I+4);
478% Cyrillic support (from http://en.wikipedia.org/wiki/Romanization_of_Russian)
479to_name("?"++T, Acc, I) -> to_name(T, [$a|Acc], I+1);
480to_name("?"++T, Acc, I) -> to_name(T, [$a|Acc], I+1);
481to_name("?"++T, Acc, I) -> to_name(T, [$b|Acc], I+1);
482to_name("?"++T, Acc, I) -> to_name(T, [$b|Acc], I+1);
483to_name("?"++T, Acc, I) -> to_name(T, [$v|Acc], I+1);
484to_name("?"++T, Acc, I) -> to_name(T, [$v|Acc], I+1);
485to_name("?"++T, Acc, I) -> to_name(T, [$g|Acc], I+1);
486to_name("?"++T, Acc, I) -> to_name(T, [$g|Acc], I+1);
487to_name("?"++T, Acc, I) -> to_name(T, [$d|Acc], I+1);
488to_name("?"++T, Acc, I) -> to_name(T, [$d|Acc], I+1);
489to_name("?"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
490to_name("?"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
491to_name("?"++T, Acc, I) -> to_name(T, [$o,$y|Acc], I+2);
492to_name("?"++T, Acc, I) -> to_name(T, [$o,$y|Acc], I+2);
493to_name("?"++T, Acc, I) -> to_name(T, [$h,$z|Acc], I+2);
494to_name("?"++T, Acc, I) -> to_name(T, [$h,$z|Acc], I+2);
495to_name("?"++T, Acc, I) -> to_name(T, [$z|Acc], I+1);
496to_name("?"++T, Acc, I) -> to_name(T, [$z|Acc], I+1);
497to_name("?"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
498to_name("?"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
499to_name("?"++T, Acc, I) -> to_name(T, [$j|Acc], I+1);
500to_name("?"++T, Acc, I) -> to_name(T, [$j|Acc], I+1);
501to_name("?"++T, Acc, I) -> to_name(T, [$k|Acc], I+1);
502to_name("?"++T, Acc, I) -> to_name(T, [$k|Acc], I+1);
503to_name("?"++T, Acc, I) -> to_name(T, [$l|Acc], I+1);
504to_name("?"++T, Acc, I) -> to_name(T, [$l|Acc], I+1);
505to_name("?"++T, Acc, I) -> to_name(T, [$m|Acc], I+1);
506to_name("?"++T, Acc, I) -> to_name(T, [$m|Acc], I+1);
507to_name("?"++T, Acc, I) -> to_name(T, [$n|Acc], I+1);
508to_name("?"++T, Acc, I) -> to_name(T, [$n|Acc], I+1);
509to_name("?"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
510to_name("?"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
511to_name("?"++T, Acc, I) -> to_name(T, [$p|Acc], I+1);
512to_name("?"++T, Acc, I) -> to_name(T, [$p|Acc], I+1);
513to_name("?"++T, Acc, I) -> to_name(T, [$r|Acc], I+1);
514to_name("?"++T, Acc, I) -> to_name(T, [$r|Acc], I+1);
515to_name("?"++T, Acc, I) -> to_name(T, [$s|Acc], I+1);
516to_name("?"++T, Acc, I) -> to_name(T, [$s|Acc], I+1);
517to_name("?"++T, Acc, I) -> to_name(T, [$t|Acc], I+1);
518to_name("?"++T, Acc, I) -> to_name(T, [$t|Acc], I+1);
519to_name("?"++T, Acc, I) -> to_name(T, [$u|Acc], I+1);
520to_name("?"++T, Acc, I) -> to_name(T, [$u|Acc], I+1);
521to_name("?"++T, Acc, I) -> to_name(T, [$f|Acc], I+1);
522to_name("?"++T, Acc, I) -> to_name(T, [$f|Acc], I+1);
523to_name("?"++T, Acc, I) -> to_name(T, [$h|Acc], I+1);
524to_name("?"++T, Acc, I) -> to_name(T, [$h|Acc], I+1);
525to_name("?"++T, Acc, I) -> to_name(T, [$c|Acc], I+1);
526to_name("?"++T, Acc, I) -> to_name(T, [$c|Acc], I+1);
527to_name("?"++T, Acc, I) -> to_name(T, [$h,$c|Acc], I+2);
528to_name("?"++T, Acc, I) -> to_name(T, [$h,$c|Acc], I+2);
529to_name("?"++T, Acc, I) -> to_name(T, [$h,$s|Acc], I+2);
530to_name("?"++T, Acc, I) -> to_name(T, [$h,$s|Acc], I+2);
531to_name("?"++T, Acc, I) -> to_name(T, [$h,$h,$s|Acc], I+3);
532to_name("?"++T, Acc, I) -> to_name(T, [$h,$h,$s|Acc], I+3);
533to_name("?"++T, Acc, I) -> to_name(T, [$_|Acc], I+1);
534to_name("?"++T, Acc, I) -> to_name(T, [$_|Acc], I+1);
535to_name("?"++T, Acc, I) -> to_name(T, [$y|Acc], I+1);
536to_name("?"++T, Acc, I) -> to_name(T, [$y|Acc], I+1);
537to_name("?"++T, Acc, I) -> to_name(T, [$_|Acc], I+1);
538to_name("?"++T, Acc, I) -> to_name(T, [$_|Acc], I+1);
539to_name("?"++T, Acc, I) -> to_name(T, [$h,$e|Acc], I+2);
540to_name("?"++T, Acc, I) -> to_name(T, [$h,$e|Acc], I+2);
541to_name("?"++T, Acc, I) -> to_name(T, [$u,$y|Acc], I+2);
542to_name("?"++T, Acc, I) -> to_name(T, [$u,$y|Acc], I+2);
543to_name("?"++T, Acc, I) -> to_name(T, [$a,$y|Acc], I+2);
544to_name("?"++T, Acc, I) -> to_name(T, [$a,$y|Acc], I+2);
545% Ukrainian support
546to_name("?"++T, Acc, I) -> to_name(T, [$g|Acc], I+1);
547to_name("?"++T, Acc, I) -> to_name(T, [$g|Acc], I+1);
548to_name("?"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
549to_name("?"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
550to_name("?"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
551to_name("?"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
552to_name("?"++T, Acc, I) -> to_name(T, [$e,$y|Acc], I+2);
553to_name("?"++T, Acc, I) -> to_name(T, [$e,$y|Acc], I+2);
554% Polish support
555to_name("?"++T, Acc, I) -> to_name(T, [$a|Acc], I+1);
556to_name("?"++T, Acc, I) -> to_name(T, [$a|Acc], I+1);
557to_name("?"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
558to_name("?"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
559to_name("?"++T, Acc, I) -> to_name(T, [$c|Acc], I+1);
560to_name("?"++T, Acc, I) -> to_name(T, [$c|Acc], I+1);
561to_name("?"++T, Acc, I) -> to_name(T, [$l|Acc], I+1);
562to_name("?"++T, Acc, I) -> to_name(T, [$l|Acc], I+1);
563to_name("?"++T, Acc, I) -> to_name(T, [$n|Acc], I+1);
564to_name("?"++T, Acc, I) -> to_name(T, [$n|Acc], I+1);
565to_name("?"++T, Acc, I) -> to_name(T, [$s|Acc], I+1);
566to_name("?"++T, Acc, I) -> to_name(T, [$s|Acc], I+1);
567to_name("?"++T, Acc, I) -> to_name(T, [$z|Acc], I+1);
568to_name("?"++T, Acc, I) -> to_name(T, [$z|Acc], I+1);
569to_name("?"++T, Acc, I) -> to_name(T, [$z|Acc], I+1);
570to_name("?"++T, Acc, I) -> to_name(T, [$z|Acc], I+1);
571% Turkish support
572to_name("?"++T, Acc, I) -> to_name(T, [$s|Acc], I+1);
573to_name("?"++T, Acc, I) -> to_name(T, [$s|Acc], I+1);
574to_name("?"++T, Acc, I) -> to_name(T, [$g|Acc], I+1);
575to_name("?"++T, Acc, I) -> to_name(T, [$g|Acc], I+1);
576to_name("?"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
577to_name("?"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
578% Some entities - we might want to add generic code here, depends
579% on where to_name/1 is used (can we assume that the input is always html?)
580to_name("&amp;"++T, Acc, I) -> to_name(T, [$_|Acc], I+1);
581to_name("&lt;"++T, Acc, I) -> to_name(T, [$_|Acc], I+1);
582to_name("&gt;"++T, Acc, I) -> to_name(T, [$_|Acc], I+1);
583to_name("&#39;"++T, Acc, I) -> to_name(T, [$_|Acc], I+1);
584% Other sequences of characters are mapped to $_
585to_name([_C|T], [$_|_] = Acc, I) ->
586    to_name(T, Acc, I+1);
587to_name([_C|T], Acc, I) ->
588    to_name(T, [$_|Acc], I+1).
589
590
591%% @doc Replace a string inside another string
592%% Copyright 2008 Rusty Klophaus  (Nitrogen, MIT License)
593replace([], _, _) -> [];
594replace(String, S1, S2) when is_list(String), is_list(S1), is_list(S2) ->
595	Length = length(S1),
596	case string:substr(String, 1, Length) of 
597		S1 -> 
598			S2 ++ replace(string:substr(String, Length + 1), S1, S2);
599		_ -> 
600			[hd(String)|replace(tl(String), S1, S2)]
601	end.
602
603
604%% @doc Truncate a string.  Append the '...' character at the place of break off.
605%% @spec truncate(String, int()) -> String
606truncate(L, N) ->
607	truncate(L, N, "").
608
609truncate(B, N, Append) when is_binary(B) ->
610	truncate(z_convert:to_list(B), N, Append);
611truncate(_L, N, _Append) when N =< 0 ->
612	[];
613truncate(L, N, Append) ->
614	truncate(L, N, Append, in_word, [], in_word, []).
615	
616
617	truncate([], _, _Append, _LastState, _Last, _AccState, Acc) ->
618		lists:reverse(Acc);
619	truncate(_, 0, _Append, sentence, Last, _AccState, _Acc) ->
620		lists:reverse(Last);
621	truncate(_, 0, Append, _, [], _AccState, Acc) ->
622		lists:reverse(insert_acc(Append, Acc));
623	truncate(_, 0, Append, _LastState, Last, _AccState, _Acc) ->
624		lists:reverse(insert_acc(Append, Last));
625	truncate([C|Rest], N, Append, LastState, Last, AccState, Acc) 
626		when C == $.; C == $!; C == $? ->
627			case AccState of
628				in_word -> truncate(Rest, N-1, Append, sentence, [C|Acc], sentence, [C|Acc]);
629				word    -> truncate(Rest, N-1, Append, sentence, [C|Acc], sentence, [C|Acc]);
630				_ 		-> truncate(Rest, N-1, Append, LastState, Last,   sentence, [C|Acc])
631			end;
632	truncate([C|Rest], N, Append, LastState, Last, AccState, Acc) 
633		when C == $;; C == $-; C == $, ->
634			case AccState of
635				in_word -> truncate(Rest, N-1, Append, sentence,  Acc,  word, [C|Acc]);
636				_ 		-> truncate(Rest, N-1, Append, LastState, Last, word, [C|Acc])
637			end;
638	truncate([C|Rest], N, Append, LastState, Last, AccState, Acc) 
639		when C == 32; C == 9; C == 10; C == 13; C == $/; C == $|; C == $(; C == $); C == $" ->
640			case AccState of
641				in_word -> truncate(Rest, N-1, Append, word, Acc, word, [C|Acc]);
642				_       -> truncate(Rest, N-1, Append, LastState, Last, word, [C|Acc])
643			end;
644	truncate([$&|_]=Input, N, Append, LastState, Last, AccState, Acc) ->
645		{Rest1,Acc1} = get_entity(Input,Acc),
646		case AccState of
647			in_word -> truncate(Rest1, N-1, Append, word, Acc1, word, Acc1);
648			_ 		-> truncate(Rest1, N-1, Append, LastState, Last, word, Acc1)
649		end;
650
651	%% Overlong encoding: start of a 2-byte sequence, but code point <= 127
652	truncate([X,A|Rest], N, Append, LastState, Last, _AccState, Acc) when X >= 192, X =< 193 ->
653		truncate(Rest, N-1, Append, LastState, Last, in_word, [A,X|Acc]);
654	%% Start of 2-byte sequence
655	truncate([X,A|Rest], N, Append, LastState, Last, _AccState, Acc) when X >= 194, X =< 223 ->
656		truncate(Rest, N-1, Append, LastState, Last, in_word, [A,X|Acc]);
657	%% Start of 3-byte sequence
658	truncate([X,A,B|Rest], N, Append, LastState, Last, _AccState, Acc) when X >= 224, X =< 239 ->
659		truncate(Rest, N-1, Append, LastState, Last, in_word, [B,A,X|Acc]);
660	%% Start of 4-byte sequence
661	truncate([X,A,B,C|Rest], N, Append, LastState, Last, _AccState, Acc) when X >= 240, X =< 244 ->
662		truncate(Rest, N-1, Append, LastState, Last, in_word, [C,B,A,X|Acc]);
663	%% Restricted by RFC 3629: start of 4-byte sequence for codepoint above 10FFFF
664	truncate([X,A,B,C|Rest], N, Append, LastState, Last, _AccState, Acc) when X >= 245, X =< 247 ->
665		truncate(Rest, N-1, Append, LastState, Last, in_word, [C,B,A,X|Acc]);
666	%% Restricted by RFC 3629: start of 5-byte sequence
667	truncate([X,A,B,C,D|Rest], N, Append, LastState, Last, _AccState, Acc) when X >= 248, X =< 251 ->
668		truncate(Rest, N-1, Append, LastState, Last, in_word, [D,C,B,A,X|Acc]);
669	%% Restricted by RFC 3629: start of 6-byte sequence
670	truncate([X,A,B,C,D,E|Rest], N, Append, LastState, Last, _AccState, Acc) when X >= 252, X =< 253 ->
671		truncate(Rest, N-1, Append, LastState, Last, in_word, [E,D,C,B,A,X|Acc]);
672	
673	%% Any other character
674	truncate([C|Rest], N, Append, LastState, Last, _AccState, Acc) ->
675		truncate(Rest, N-1, Append, LastState, Last, in_word, [C|Acc]).
676
677	insert_acc([], Acc) ->
678		Acc;
679	insert_acc([H|T], Acc) ->
680		insert_acc(T, [H|Acc]).
681	
682    get_entity([], Acc) ->
683    	{[],Acc};
684    get_entity([$;|Rest], Acc) ->
685    	{Rest,[$;|Acc]};
686    get_entity([C|Rest], Acc) ->
687    	get_entity(Rest, [C|Acc]).
688
689
690truncatewords(S, Words) ->
691    truncatewords(S, Words, "").
692truncatewords(S, Words, Append) when is_binary(S) ->
693    truncatewords(z_convert:to_list(S), in_space, Words, Append, []);
694truncatewords(S, Words, Append) when is_list(S) ->
695    truncatewords(S, in_space, Words, Append, []).
696
697    truncatewords(_S, _State, 0, Append, Acc) ->
698        lists:reverse(trim_left_func(Acc, fun iswordsep/1), Append);
699    truncatewords([], _State, _Words, _Append, Acc) ->
700        lists:reverse(Acc);
701    truncatewords([C|Rest], in_space, Words, Append, Acc) ->
702        case iswordsep(C) of
703            true -> truncatewords(Rest, in_space, Words, Append, [C|Acc]);
704            false -> truncatewords(Rest, in_word, Words, Append, [C|Acc])
705        end;
706    truncatewords([C|Rest], in_word, Words, Append, Acc) ->
707        case iswordsep(C) of
708            true -> truncatewords(Rest, in_space, Words-1, Append, [C|Acc]);
709            false -> truncatewords(Rest, in_word, Words, Append, [C|Acc])
710        end.
711
712    iswordsep($\s) -> true;
713    iswordsep($\n) -> true;
714    iswordsep($\r) -> true;
715    iswordsep($\t) -> true;
716    iswordsep($,) -> true;
717    iswordsep($:) -> true;
718    iswordsep($;) -> true;
719    iswordsep(_) -> false.
720
721
722%% @doc Split the binary into lines. Line separators can be \r, \n or \r\n.
723split_lines(B) when is_binary(B) ->
724	split_lines(B, <<>>, []).
725	
726	split_lines(<<>>, Line, Acc) ->
727		lists:reverse([Line|Acc]);
728 	split_lines(<<13,10,Rest/binary>>, Line, Acc) ->
729		split_lines(Rest, <<>>, [Line|Acc]);
730 	split_lines(<<13,Rest/binary>>, Line, Acc) ->
731		split_lines(Rest, <<>>, [Line|Acc]);
732 	split_lines(<<10,Rest/binary>>, Line, Acc) ->
733		split_lines(Rest, <<>>, [Line|Acc]);
734	split_lines(<<C, Rest/binary>>, Line, Acc) ->
735		split_lines(Rest, <<Line/binary, C>>, Acc).
736
737
738%% @doc Escape special characters for ical RFC2445 elements
739escape_ical(L) when is_list(L) ->
740	escape_ical(iolist_to_binary(L));
741escape_ical(B) when is_binary(B) ->
742	escape_ical(B, <<>>, 0);
743escape_ical(A) when is_atom(A) ->
744	escape_ical(atom_to_list(A)).
745
746	escape_ical(<<>>, Acc, _N) -> Acc;
747	escape_ical(B, Acc, N) when N >= 70 -> escape_ical(B, <<Acc/binary, 13, 10, 32>>, 0);
748	escape_ical(<<13, 10, Rest/binary>>, Acc, N) -> escape_ical(Rest, <<Acc/binary, $\\, $n>>, N+2);
749	escape_ical(<<10, Rest/binary>>, Acc, N) -> escape_ical(Rest, <<Acc/binary, $\\, $n>>, N+2);
750	escape_ical(<<9, Rest/binary>>, Acc, N) -> escape_ical(Rest, <<Acc/binary, 32>>, N+1);
751	escape_ical(<<$", Rest/binary>>, Acc, N) -> escape_ical(Rest, <<Acc/binary, $\\, $">>, N+2);
752	escape_ical(<<$,, Rest/binary>>, Acc, N) -> escape_ical(Rest, <<Acc/binary, $\\, $,>>, N+2);
753	escape_ical(<<$:, Rest/binary>>, Acc, N) -> escape_ical(Rest, <<Acc/binary, $", $:, $">>, N+3);
754	escape_ical(<<$;, Rest/binary>>, Acc, N) -> escape_ical(Rest, <<Acc/binary, $\\, $;>>, N+2);
755	escape_ical(<<$\\, Rest/binary>>, Acc, N) -> escape_ical(Rest, <<Acc/binary, $\\, $\\>>, N+2);
756	escape_ical(<<C, Rest/binary>>, Acc, N) -> escape_ical(Rest, <<Acc/binary, C>>, N+1).
757
758%% @doc Return true if Start is a prefix of Word
759%% @spec starts_with(String, String) -> bool()
760starts_with(Start, B) when is_binary(Start), is_binary(B) ->
761    StartSize = size(Start),
762    case B of
763        <<Start:StartSize/binary, _/binary>> -> true;
764        _ -> false
765    end;
766starts_with(Start, String) ->
767    starts_with(iolist_to_binary(Start), iolist_to_binary(String)).
768
769
770%% @doc Return true iff Word ends with End
771%% @spec ends_with(String, String) -> bool()
772ends_with(End, B) when is_binary(End), is_binary(B) ->
773    StartSize = size(B) - size(End),
774    case B of
775        <<_:StartSize/binary, End/binary>> -> true;
776        _ ->false
777    end;
778ends_with(End, String) ->
779    ends_with(iolist_to_binary(End), iolist_to_binary(String)).
780
781
782%% @doc Return true iff What is found in the string
783%% @spec contains(String, String) -> bool()
784contains(What, B) when is_binary(What), is_binary(B) ->
785    contains(What, size(What), B, 0);
786contains(What, String) ->
787    contains(iolist_to_binary(What), iolist_to_binary(String)).
788
789    contains(_What, _SizeWhat, B, C) when C > size(B) ->
790        false;
791    contains(What, SizeWhat, B, C) ->
792        case B of
793            <<_:C/binary, What:SizeWhat/binary, _/binary>> ->true;
794            _ ->contains(What, SizeWhat, B, C + 1)
795        end.
796%% @doc Split a string, see http://www.erlang.org/pipermail/erlang-questions/2008-October/038896.html
797%% @spec split(String, String) -> list()
798
799split(String, []) ->
800     split0(String);
801split(String, [Sep]) when is_integer(Sep) ->
802     split1(String, Sep);
803split(String, [C1,C2|L]) when is_integer(C1), is_integer(C2) ->
804     split2(String, C1, C2, L).
805
806%% Split a string at "", which is deemed to occur _between_
807%% adjacent characters, but queerly, not at the beginning
808%% or the end.
809
810split0([C|Cs]) ->
811     [[C] | split0(Cs)];
812split0([]) ->
813     [].
814
815%% Split a string at a single character separator.
816
817split1(String, Sep) ->
818     split1_loop(String, Sep, "").
819
820split1_loop([Sep|String], Sep, Rev) ->
821     [lists:reverse(Rev) | split1(String, Sep)];
822split1_loop([Chr|String], Sep, Rev) ->
823     split1_loop(String, Sep, [Chr|Rev]);
824split1_loop([], _, Rev) ->
825     [lists:reverse(Rev)].
826
827%% Split a string at a multi-character separator
828%% [C1,C2|L].  These components are split out for
829%% a fast match.
830
831split2(String, C1, C2, L) ->
832     split2_loop(String, C1, C2, L, "").
833
834split2_loop([C1|S = [C2|String]], C1, C2, L, Rev) ->
835     case split_prefix(L, String)
836       of no   -> split2_loop(S, C1, C2, L, [C1|Rev])
837        ; Rest -> [lists:reverse(Rev) | split2(Rest, C1, C2, L)]
838     end;
839split2_loop([Chr|String], C1, C2, L, Rev) ->
840     split2_loop(String, C1, C2, L, [Chr|Rev]);
841split2_loop([], _, _, _, Rev) ->
842     [lists:reverse(Rev)].
843
844split_prefix([C|L], [C|S]) -> split_prefix(L, S);
845split_prefix([],    S)     -> S;
846split_prefix(_,     _)     -> no.
847
848test() ->
849    A = "ьзgen",
850    A = to_lower(to_upper(A)),
851    "ucgen" = to_name(A),
852
853    "a" = first_char("aap"),
854    "?" = first_char("???xx"),
855    "?" = first_char("?aap"),
856    ok.