/src/support/z_string.erl

https://code.google.com/p/zotonic/ · Erlang · 856 lines · 675 code · 88 blank · 93 comment · 21 complexity · a31d95872c65a2562832bd04ff89e3c8 MD5 · raw file

  1. %% @author Marc Worrell <marc@worrell.nl>
  2. %% @copyright 2009-2010 Marc Worrell
  3. %% Date: 2009-04-26
  4. %% @doc String related functions
  5. %% @todo Make this UTF-8 safe
  6. %% @todo Check valid chars for filenames, allow chinese, japanese, etc?
  7. %% CJK Unified Ideographs Extension A: Range: 3400-4DBF
  8. %% CJK Unified Ideographs: Range: 4E00-9FAF
  9. %% Kangxi Radicals: Range 2F00-2FDF
  10. %% See also: http://www.utf8-chartable.de/
  11. %% Copyright 2009-2010 Marc Worrell
  12. %%
  13. %% Licensed under the Apache License, Version 2.0 (the "License");
  14. %% you may not use this file except in compliance with the License.
  15. %% You may obtain a copy of the License at
  16. %%
  17. %% http://www.apache.org/licenses/LICENSE-2.0
  18. %%
  19. %% Unless required by applicable law or agreed to in writing, software
  20. %% distributed under the License is distributed on an "AS IS" BASIS,
  21. %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  22. %% See the License for the specific language governing permissions and
  23. %% limitations under the License.
  24. -module(z_string).
  25. -author("Marc Worrell <marc@worrell.nl").
  26. %% interface functions
  27. -export([
  28. trim/1,
  29. trim_left/1,
  30. trim_right/1,
  31. trim/2,
  32. trim_left/2,
  33. trim_right/2,
  34. trim_left_func/2,
  35. is_string/1,
  36. first_char/1,
  37. last_char/1,
  38. unquote/1,
  39. unquote/2,
  40. nospaces/1,
  41. line/1,
  42. to_rootname/1,
  43. to_name/1,
  44. to_slug/1,
  45. to_lower/1,
  46. to_upper/1,
  47. replace/3,
  48. truncate/2,
  49. truncate/3,
  50. truncatewords/2,
  51. truncatewords/3,
  52. split_lines/1,
  53. escape_ical/1,
  54. starts_with/2,
  55. ends_with/2,
  56. contains/2,
  57. split/2,
  58. test/0
  59. ]).
  60. -include_lib("include/zotonic.hrl").
  61. %% @doc Remove whitespace at the start and end of the string
  62. trim(B) when is_binary(B) ->
  63. trim_right(trim_left(B));
  64. trim(L) when is_list(L) ->
  65. binary_to_list(trim(iolist_to_binary(L))).
  66. %% @doc Remove all occurences of a character at the start and end of a string.
  67. trim(B, Char) when is_binary(B) ->
  68. trim_right(trim_left(B, Char), Char);
  69. trim(L, Char) when is_list(L) ->
  70. binary_to_list(trim(iolist_to_binary(L), Char)).
  71. %% @doc Remove whitespace at the start the string
  72. trim_left(S) ->
  73. trim_left_func(S, fun(C) -> C =< 32 end).
  74. %% @doc Remove all occurences of a char at the start of a string
  75. trim_left(S, Char) ->
  76. trim_left_func(S, fun(C) -> C == Char end).
  77. trim_left_func(<<Char, Rest/binary>> = Bin, F) ->
  78. case F(Char) of
  79. true -> trim_left_func(Rest, F);
  80. false -> Bin
  81. end;
  82. trim_left_func([Char|Rest] = L, F) when is_integer(Char) ->
  83. case F(Char) of
  84. true -> trim_left(Rest, F);
  85. false -> L
  86. end;
  87. trim_left_func([L|Rest], F) when is_list(L); is_binary(L) ->
  88. case trim_left_func(L, F) of
  89. [] -> trim_left_func(Rest, F);
  90. <<>> -> trim_left_func(Rest, F);
  91. Other -> [Other|Rest]
  92. end;
  93. trim_left_func(Other, _F) ->
  94. Other.
  95. %% @doc Remove whitespace at the end of the string
  96. trim_right(B) when is_binary(B) ->
  97. trim_right(B, <<>>, <<>>);
  98. trim_right(L) ->
  99. binary_to_list(trim_right(iolist_to_binary(L))).
  100. trim_right(<<C, Rest/binary>>, WS, Acc) ->
  101. case C of
  102. W when W =< 32 -> trim_right(Rest, <<WS/binary, C>>, Acc);
  103. _ -> trim_right(Rest, <<>>, <<Acc/binary, WS/binary, C>>)
  104. end;
  105. trim_right(<<>>, _WS, Acc) ->
  106. Acc.
  107. %% @doc Remove all occurences of a char at the end of the string
  108. trim_right(B, Char) when is_binary(B) ->
  109. trim_right(B, Char, <<>>, <<>>);
  110. trim_right(L, Char) ->
  111. binary_to_list(trim_right(iolist_to_binary(L), Char)).
  112. trim_right(<<C, Rest/binary>>, Char, WS, Acc) ->
  113. case C of
  114. Char -> trim_right(Rest, Char, <<WS/binary, C>>, Acc);
  115. _ -> trim_right(Rest, Char, <<>>, <<Acc/binary, WS/binary, C>>)
  116. end;
  117. trim_right(<<>>, _Char, _WS, Acc) ->
  118. Acc.
  119. %% @doc Check if the variable is a one dimensional list, probably a string
  120. is_string([]) ->
  121. true;
  122. is_string([C|Rest]) when
  123. is_integer(C)
  124. andalso C =< 255
  125. andalso (C >= 32 orelse C == 9 orelse C == 10 orelse C == 12 orelse C == 13) ->
  126. is_string(Rest);
  127. is_string(_) ->
  128. false.
  129. %% @doc Return the first character of a string.
  130. %% @todo Make this UTF-8 safe
  131. first_char([]) -> undefined;
  132. first_char([H|T]) when is_integer(H) ->
  133. truncate([H|T], 1, "");
  134. first_char(<<>>) -> undefined;
  135. first_char(<<C, _/binary>>) -> C.
  136. %% @doc Return the last character of a string
  137. last_char([]) -> undefined;
  138. last_char([C]) -> C;
  139. last_char([_|R]) -> last_char(R);
  140. last_char(<<>>) -> undefined;
  141. last_char(<<C>>) -> C;
  142. last_char(<<_, R/binary>>) -> last_char(R).
  143. %% @doc Remove the first and last char if they are double quotes.
  144. unquote(S) ->
  145. unquote(S, $").
  146. unquote(S, Q) ->
  147. case S of
  148. <<Q, R/binary>> -> unquote1(R, <<>>, Q, S);
  149. [Q|R] -> unquote1(R, [], Q, S);
  150. _ -> S
  151. end.
  152. unquote1([], _Acc, _Q, S) -> S;
  153. unquote1([Q], Acc, Q, _S) -> lists:reverse(Acc);
  154. unquote1([H|T], Acc, Q, S) -> unquote1(T, [H|Acc], Q, S);
  155. unquote1(<<>>, _Acc, _Q, S) -> S;
  156. unquote1(<<Q>>, Acc, Q, _S) -> Acc;
  157. unquote1(<<C,R/binary>>, Acc, Q, S) -> unquote1(R, <<Acc/binary, C>>, Q, S).
  158. %% @doc Remove all spaces and control characters from a string.
  159. nospaces(B) when is_binary(B) ->
  160. nospaces(binary_to_list(B));
  161. nospaces(L) ->
  162. nospaces(L, []).
  163. nospaces([], Acc) ->
  164. lists:reverse(Acc);
  165. nospaces([C|Rest], Acc) when C =< 32 ->
  166. nospaces(Rest, Acc);
  167. nospaces([C|Rest], Acc) ->
  168. nospaces(Rest, [C|Acc]).
  169. %% @doc Make sure that the string is on one line only, replace control characters with spaces
  170. line(B) when is_binary(B) ->
  171. line(binary_to_list(B));
  172. line(L) ->
  173. line1(L, []).
  174. line1([], Acc) ->
  175. lists:reverse(Acc);
  176. line1([H|T], Acc) when H < 32 ->
  177. line1(T, [32|Acc]);
  178. line1([H|T], Acc) ->
  179. line1(T, [H|Acc]).
  180. %% @doc Return a lowercase string for the input
  181. %% @spec to_lower(Value) -> String
  182. to_lower(B) when is_binary(B) ->
  183. to_lower(binary_to_list(B));
  184. to_lower(A) when is_atom(A) ->
  185. to_lower(atom_to_list(A));
  186. to_lower(L) when is_list(L) ->
  187. to_lower(lists:flatten(L), []).
  188. to_lower([], Acc) -> lists:reverse(Acc);
  189. to_lower([H|T], Acc) when H >= $A andalso H =< $Z -> to_lower(T, [H+32|Acc]);
  190. to_lower("Е"++T, Acc) -> to_lower(T, [165,195|Acc]);
  191. to_lower("Д"++T, Acc) -> to_lower(T, [164,195|Acc]);
  192. to_lower("Б"++T, Acc) -> to_lower(T, [161,195|Acc]);
  193. to_lower("А"++T, Acc) -> to_lower(T, [160,195|Acc]);
  194. to_lower("Л"++T, Acc) -> to_lower(T, [171,195|Acc]);
  195. to_lower("К"++T, Acc) -> to_lower(T, [170,195|Acc]);
  196. to_lower("Й"++T, Acc) -> to_lower(T, [169,195|Acc]);
  197. to_lower("И"++T, Acc) -> to_lower(T, [168,195|Acc]);
  198. to_lower("П"++T, Acc) -> to_lower(T, [175,195|Acc]);
  199. to_lower("О"++T, Acc) -> to_lower(T, [174,195|Acc]);
  200. to_lower("Н"++T, Acc) -> to_lower(T, [173,195|Acc]);
  201. to_lower("М"++T, Acc) -> to_lower(T, [172,195|Acc]);
  202. to_lower("Ь"++T, Acc) -> to_lower(T, [188,195|Acc]);
  203. to_lower("Ы"++T, Acc) -> to_lower(T, [187,195|Acc]);
  204. to_lower("Ъ"++T, Acc) -> to_lower(T, [186,195|Acc]);
  205. to_lower("Щ"++T, Acc) -> to_lower(T, [185,195|Acc]);
  206. to_lower("Ц"++T, Acc) -> to_lower(T, [182,195|Acc]);
  207. to_lower("Ф"++T, Acc) -> to_lower(T, [180,195|Acc]);
  208. to_lower("У"++T, Acc) -> to_lower(T, [179,195|Acc]);
  209. to_lower("Т"++T, Acc) -> to_lower(T, [178,195|Acc]);
  210. to_lower("Ш"++T, Acc) -> to_lower(T, [184,195|Acc]);
  211. to_lower("З"++T, Acc) -> to_lower(T, [167,195|Acc]);
  212. to_lower("Ж"++T, Acc) -> to_lower(T, [166,195|Acc]);
  213. to_lower("Њ"++T, Acc) -> to_lower(T, [147,197|Acc]);
  214. % Cyrillic support
  215. to_lower("?"++T, Acc) -> to_lower(T, [176,208|Acc]);
  216. to_lower("?"++T, Acc) -> to_lower(T, [177,208|Acc]);
  217. to_lower("?"++T, Acc) -> to_lower(T, [178,208|Acc]);
  218. to_lower("?"++T, Acc) -> to_lower(T, [179,208|Acc]);
  219. to_lower("?"++T, Acc) -> to_lower(T, [180,208|Acc]);
  220. to_lower("?"++T, Acc) -> to_lower(T, [181,208|Acc]);
  221. to_lower("?"++T, Acc) -> to_lower(T, [145,209|Acc]);
  222. to_lower("?"++T, Acc) -> to_lower(T, [182,208|Acc]);
  223. to_lower("?"++T, Acc) -> to_lower(T, [183,208|Acc]);
  224. to_lower("?"++T, Acc) -> to_lower(T, [184,208|Acc]);
  225. to_lower("?"++T, Acc) -> to_lower(T, [185,208|Acc]);
  226. to_lower("?"++T, Acc) -> to_lower(T, [186,208|Acc]);
  227. to_lower("?"++T, Acc) -> to_lower(T, [187,208|Acc]);
  228. to_lower("?"++T, Acc) -> to_lower(T, [188,208|Acc]);
  229. to_lower("?"++T, Acc) -> to_lower(T, [189,208|Acc]);
  230. to_lower("?"++T, Acc) -> to_lower(T, [190,208|Acc]);
  231. to_lower("?"++T, Acc) -> to_lower(T, [191,208|Acc]);
  232. to_lower("?"++T, Acc) -> to_lower(T, [128,209|Acc]);
  233. to_lower("?"++T, Acc) -> to_lower(T, [129,209|Acc]);
  234. to_lower("?"++T, Acc) -> to_lower(T, [130,209|Acc]);
  235. to_lower("?"++T, Acc) -> to_lower(T, [131,209|Acc]);
  236. to_lower("?"++T, Acc) -> to_lower(T, [132,209|Acc]);
  237. to_lower("?"++T, Acc) -> to_lower(T, [133,209|Acc]);
  238. to_lower("?"++T, Acc) -> to_lower(T, [134,209|Acc]);
  239. to_lower("?"++T, Acc) -> to_lower(T, [135,209|Acc]);
  240. to_lower("?"++T, Acc) -> to_lower(T, [136,209|Acc]);
  241. to_lower("?"++T, Acc) -> to_lower(T, [137,209|Acc]);
  242. to_lower("?"++T, Acc) -> to_lower(T, [138,209|Acc]);
  243. to_lower("?"++T, Acc) -> to_lower(T, [139,209|Acc]);
  244. to_lower("?"++T, Acc) -> to_lower(T, [140,209|Acc]);
  245. to_lower("?"++T, Acc) -> to_lower(T, [141,209|Acc]);
  246. to_lower("?"++T, Acc) -> to_lower(T, [142,209|Acc]);
  247. to_lower("?"++T, Acc) -> to_lower(T, [143,209|Acc]);
  248. % Extra Ukrainian characters
  249. to_lower("?"++T, Acc) -> to_lower(T, [145,210|Acc]);
  250. to_lower("?"++T, Acc) -> to_lower(T, [151,209|Acc]);
  251. to_lower("?"++T, Acc) -> to_lower(T, [150,209|Acc]);
  252. to_lower("?"++T, Acc) -> to_lower(T, [148,209|Acc]);
  253. % Polish support
  254. to_lower("?"++T, Acc) -> to_lower(T, [133,196|Acc]);
  255. to_lower("?"++T, Acc) -> to_lower(T, [153,196|Acc]);
  256. to_lower("?"++T, Acc) -> to_lower(T, [135,196|Acc]);
  257. to_lower("?"++T, Acc) -> to_lower(T, [130,197|Acc]);
  258. to_lower("?"++T, Acc) -> to_lower(T, [132,197|Acc]);
  259. to_lower("?"++T, Acc) -> to_lower(T, [155,197|Acc]);
  260. to_lower("?"++T, Acc) -> to_lower(T, [186,197|Acc]);
  261. to_lower("?"++T, Acc) -> to_lower(T, [188,197|Acc]);
  262. % Turkish support
  263. to_lower("?"++T, Acc) -> to_lower(T, [159,197|Acc]);
  264. to_lower("?"++T, Acc) -> to_lower(T, [159,196|Acc]);
  265. to_lower("?"++T, Acc) -> to_lower(T, [177,196|Acc]);
  266. % Other characters are taken as-is
  267. to_lower([H|T], Acc) -> to_lower(T, [H|Acc]).
  268. %% @doc Return a uppercase string for the input
  269. %% @spec to_upper(Value) -> String
  270. to_upper(B) when is_binary(B) ->
  271. to_upper(binary_to_list(B));
  272. to_upper(A) when is_atom(A) ->
  273. to_upper(atom_to_list(A));
  274. to_upper(L) when is_list(L) ->
  275. to_upper(lists:flatten(L), []).
  276. to_upper([], Acc) -> lists:reverse(Acc);
  277. to_upper([H|T], Acc) when H >= $a andalso H =< $z -> to_upper(T, [H-32|Acc]);
  278. to_upper("е"++T, Acc) -> to_upper(T, [133,195|Acc]);
  279. to_upper("д"++T, Acc) -> to_upper(T, [132,195|Acc]);
  280. to_upper("б"++T, Acc) -> to_upper(T, [129,195|Acc]);
  281. to_upper("а"++T, Acc) -> to_upper(T, [128,195|Acc]);
  282. to_upper("л"++T, Acc) -> to_upper(T, [139,195|Acc]);
  283. to_upper("к"++T, Acc) -> to_upper(T, [138,195|Acc]);
  284. to_upper("й"++T, Acc) -> to_upper(T, [137,195|Acc]);
  285. to_upper("и"++T, Acc) -> to_upper(T, [136,195|Acc]);
  286. to_upper("п"++T, Acc) -> to_upper(T, [143,195|Acc]);
  287. to_upper("О"++T, Acc) -> to_upper(T, [142,195|Acc]);
  288. to_upper("н"++T, Acc) -> to_upper(T, [141,195|Acc]);
  289. to_upper("м"++T, Acc) -> to_upper(T, [140,195|Acc]);
  290. to_upper("ь"++T, Acc) -> to_upper(T, [156,195|Acc]);
  291. to_upper("ы"++T, Acc) -> to_upper(T, [155,195|Acc]);
  292. to_upper("ъ"++T, Acc) -> to_upper(T, [154,195|Acc]);
  293. to_upper("щ"++T, Acc) -> to_upper(T, [153,195|Acc]);
  294. to_upper("ц"++T, Acc) -> to_upper(T, [150,195|Acc]);
  295. to_upper("ф"++T, Acc) -> to_upper(T, [148,195|Acc]);
  296. to_upper("у"++T, Acc) -> to_upper(T, [147,195|Acc]);
  297. to_upper("т"++T, Acc) -> to_upper(T, [146,195|Acc]);
  298. to_upper("ш"++T, Acc) -> to_upper(T, [152,195|Acc]);
  299. to_upper("з"++T, Acc) -> to_upper(T, [135,195|Acc]);
  300. to_upper("ж"++T, Acc) -> to_upper(T, [134,195|Acc]);
  301. to_upper("њ"++T, Acc) -> to_upper(T, [146,197|Acc]);
  302. % Cyrillic support
  303. to_upper("?"++T, Acc) -> to_upper(T, [144,208|Acc]);
  304. to_upper("?"++T, Acc) -> to_upper(T, [145,208|Acc]);
  305. to_upper("?"++T, Acc) -> to_upper(T, [146,208|Acc]);
  306. to_upper("?"++T, Acc) -> to_upper(T, [147,208|Acc]);
  307. to_upper("?"++T, Acc) -> to_upper(T, [148,208|Acc]);
  308. to_upper("?"++T, Acc) -> to_upper(T, [149,208|Acc]);
  309. to_upper("?"++T, Acc) -> to_upper(T, [129,208|Acc]);
  310. to_upper("?"++T, Acc) -> to_upper(T, [150,208|Acc]);
  311. to_upper("?"++T, Acc) -> to_upper(T, [151,208|Acc]);
  312. to_upper("?"++T, Acc) -> to_upper(T, [152,208|Acc]);
  313. to_upper("?"++T, Acc) -> to_upper(T, [153,208|Acc]);
  314. to_upper("?"++T, Acc) -> to_upper(T, [154,208|Acc]);
  315. to_upper("?"++T, Acc) -> to_upper(T, [155,208|Acc]);
  316. to_upper("?"++T, Acc) -> to_upper(T, [156,208|Acc]);
  317. to_upper("?"++T, Acc) -> to_upper(T, [157,208|Acc]);
  318. to_upper("?"++T, Acc) -> to_upper(T, [158,208|Acc]);
  319. to_upper("?"++T, Acc) -> to_upper(T, [159,208|Acc]);
  320. to_upper("?"++T, Acc) -> to_upper(T, [160,208|Acc]);
  321. to_upper("?"++T, Acc) -> to_upper(T, [161,208|Acc]);
  322. to_upper("?"++T, Acc) -> to_upper(T, [162,208|Acc]);
  323. to_upper("?"++T, Acc) -> to_upper(T, [163,208|Acc]);
  324. to_upper("?"++T, Acc) -> to_upper(T, [164,208|Acc]);
  325. to_upper("?"++T, Acc) -> to_upper(T, [165,208|Acc]);
  326. to_upper("?"++T, Acc) -> to_upper(T, [166,208|Acc]);
  327. to_upper("?"++T, Acc) -> to_upper(T, [167,208|Acc]);
  328. to_upper("?"++T, Acc) -> to_upper(T, [168,208|Acc]);
  329. to_upper("?"++T, Acc) -> to_upper(T, [169,208|Acc]);
  330. to_upper("?"++T, Acc) -> to_upper(T, [170,208|Acc]);
  331. to_upper("?"++T, Acc) -> to_upper(T, [171,208|Acc]);
  332. to_upper("?"++T, Acc) -> to_upper(T, [172,208|Acc]);
  333. to_upper("?"++T, Acc) -> to_upper(T, [173,208|Acc]);
  334. to_upper("?"++T, Acc) -> to_upper(T, [174,208|Acc]);
  335. to_upper("?"++T, Acc) -> to_upper(T, [175,208|Acc]);
  336. % Extra Ukrainian characters
  337. to_upper("?"++T, Acc) -> to_upper(T, [144,210|Acc]);
  338. to_upper("?"++T, Acc) -> to_upper(T, [135,208|Acc]);
  339. to_upper("?"++T, Acc) -> to_upper(T, [143,208|Acc]);
  340. to_upper("?"++T, Acc) -> to_upper(T, [132,208|Acc]);
  341. % Polish support
  342. to_upper("?"++T, Acc) -> to_upper(T, [132,196|Acc]);
  343. to_upper("?"++T, Acc) -> to_upper(T, [152,196|Acc]);
  344. to_upper("?"++T, Acc) -> to_upper(T, [134,196|Acc]);
  345. to_upper("?"++T, Acc) -> to_upper(T, [129,197|Acc]);
  346. to_upper("?"++T, Acc) -> to_upper(T, [131,197|Acc]);
  347. to_upper("?"++T, Acc) -> to_upper(T, [154,197|Acc]);
  348. to_upper("?"++T, Acc) -> to_upper(T, [185,197|Acc]);
  349. to_upper("?"++T, Acc) -> to_upper(T, [187,197|Acc]);
  350. % Turkish support
  351. to_upper("?"++T, Acc) -> to_upper(T, [158,197|Acc]);
  352. to_upper("?"++T, Acc) -> to_upper(T, [158,196|Acc]);
  353. to_upper("?"++T, Acc) -> to_upper(T, [176,196|Acc]);
  354. % Other chars are taken as-is
  355. to_upper([H|T], Acc) -> to_upper(T, [H|Acc]).
  356. %% @doc Filter a filename so that we obtain a basename that is safe to use.
  357. %% @spec to_rootname(string()) -> string()
  358. to_rootname(Filename) ->
  359. to_slug(filename:rootname(filename:basename(Filename))).
  360. %% @doc Map a string to a slug that can be used in the uri of a page. Same as a name, but then with dashes instead of underscores.
  361. %% @spec to_slug(String) -> String
  362. to_slug(Title) ->
  363. Slug = to_name(Title),
  364. [ case C of $_ -> $-; _ -> C end || C <- Slug ].
  365. %% @doc Map a string to a value that can be used as a name or slug. Maps all characters to lowercase and remove non digalpha chars
  366. %% @spec to_name(String) -> String
  367. to_name({trans, Tr}) ->
  368. case proplists:get_value(en, Tr) of
  369. undefined ->
  370. case Tr of
  371. [{_,V}|_] -> to_name(V);
  372. _ -> to_name([])
  373. end;
  374. V -> to_name(V)
  375. end;
  376. to_name(Name) when is_binary(Name) ->
  377. to_name(binary_to_list(Name));
  378. to_name(Name) when is_atom(Name) ->
  379. to_name(atom_to_list(Name));
  380. to_name(Name) ->
  381. to_name(Name, [], 0).
  382. to_name([], Acc, _I) ->
  383. case string:strip(lists:reverse(Acc), both, $_) of
  384. [] -> "_";
  385. Name -> Name
  386. end;
  387. to_name(_, Acc, N) when N >= 80 ->
  388. to_name([], Acc, 80);
  389. to_name([C|T], Acc, I) when C >= $A andalso C =< $Z ->
  390. to_name(T, [C+32|Acc], I+1);
  391. to_name([C|T], Acc, I) when (C >= $a andalso C =< $z) orelse (C >= $0 andalso C =< $9) orelse C =:= $_ ->
  392. to_name(T, [C|Acc], I+1);
  393. to_name("д"++T, Acc, I) -> to_name(T, [$a|Acc], I+1);
  394. to_name("л"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
  395. to_name("п"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
  396. to_name("ь"++T, Acc, I) -> to_name(T, [$u|Acc], I+1);
  397. to_name("ц"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
  398. to_name("Д"++T, Acc, I) -> to_name(T, [$a|Acc], I+1);
  399. to_name("Л"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
  400. to_name("П"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
  401. to_name("Ь"++T, Acc, I) -> to_name(T, [$u|Acc], I+1);
  402. to_name("Ц"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
  403. to_name("й"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
  404. to_name("и"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
  405. to_name("Й"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
  406. to_name("И"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
  407. to_name("н"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
  408. to_name("м"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
  409. to_name("Н"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
  410. to_name("М"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
  411. to_name("ъ"++T, Acc, I) -> to_name(T, [$u|Acc], I+1);
  412. to_name("щ"++T, Acc, I) -> to_name(T, [$u|Acc], I+1);
  413. to_name("Ъ"++T, Acc, I) -> to_name(T, [$u|Acc], I+1);
  414. to_name("Щ"++T, Acc, I) -> to_name(T, [$u|Acc], I+1);
  415. to_name("у"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
  416. to_name("т"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
  417. to_name("У"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
  418. to_name("Т"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
  419. to_name("Я"++T, Acc, I) -> to_name(T, [$s,$s|Acc], I+2);
  420. to_name("з"++T, Acc, I) -> to_name(T, [$c|Acc], I+1);
  421. to_name("З"++T, Acc, I) -> to_name(T, [$c|Acc], I+1);
  422. to_name("ш"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
  423. to_name("Ш"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
  424. to_name("е"++T, Acc, I) -> to_name(T, [$a|Acc], I+1);
  425. to_name("Е"++T, Acc, I) -> to_name(T, [$a|Acc], I+1);
  426. to_name("Ђ"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
  427. to_name("я"++T, Acc, I) -> to_name(T, [$i,$j|Acc], I+2);
  428. to_name("@"++T, Acc, I) -> to_name(T, [$_,$t,$a,$_|Acc], I+4);
  429. % Cyrillic support (from http://en.wikipedia.org/wiki/Romanization_of_Russian)
  430. to_name("?"++T, Acc, I) -> to_name(T, [$a|Acc], I+1);
  431. to_name("?"++T, Acc, I) -> to_name(T, [$a|Acc], I+1);
  432. to_name("?"++T, Acc, I) -> to_name(T, [$b|Acc], I+1);
  433. to_name("?"++T, Acc, I) -> to_name(T, [$b|Acc], I+1);
  434. to_name("?"++T, Acc, I) -> to_name(T, [$v|Acc], I+1);
  435. to_name("?"++T, Acc, I) -> to_name(T, [$v|Acc], I+1);
  436. to_name("?"++T, Acc, I) -> to_name(T, [$g|Acc], I+1);
  437. to_name("?"++T, Acc, I) -> to_name(T, [$g|Acc], I+1);
  438. to_name("?"++T, Acc, I) -> to_name(T, [$d|Acc], I+1);
  439. to_name("?"++T, Acc, I) -> to_name(T, [$d|Acc], I+1);
  440. to_name("?"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
  441. to_name("?"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
  442. to_name("?"++T, Acc, I) -> to_name(T, [$o,$y|Acc], I+2);
  443. to_name("?"++T, Acc, I) -> to_name(T, [$o,$y|Acc], I+2);
  444. to_name("?"++T, Acc, I) -> to_name(T, [$h,$z|Acc], I+2);
  445. to_name("?"++T, Acc, I) -> to_name(T, [$h,$z|Acc], I+2);
  446. to_name("?"++T, Acc, I) -> to_name(T, [$z|Acc], I+1);
  447. to_name("?"++T, Acc, I) -> to_name(T, [$z|Acc], I+1);
  448. to_name("?"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
  449. to_name("?"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
  450. to_name("?"++T, Acc, I) -> to_name(T, [$j|Acc], I+1);
  451. to_name("?"++T, Acc, I) -> to_name(T, [$j|Acc], I+1);
  452. to_name("?"++T, Acc, I) -> to_name(T, [$k|Acc], I+1);
  453. to_name("?"++T, Acc, I) -> to_name(T, [$k|Acc], I+1);
  454. to_name("?"++T, Acc, I) -> to_name(T, [$l|Acc], I+1);
  455. to_name("?"++T, Acc, I) -> to_name(T, [$l|Acc], I+1);
  456. to_name("?"++T, Acc, I) -> to_name(T, [$m|Acc], I+1);
  457. to_name("?"++T, Acc, I) -> to_name(T, [$m|Acc], I+1);
  458. to_name("?"++T, Acc, I) -> to_name(T, [$n|Acc], I+1);
  459. to_name("?"++T, Acc, I) -> to_name(T, [$n|Acc], I+1);
  460. to_name("?"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
  461. to_name("?"++T, Acc, I) -> to_name(T, [$o|Acc], I+1);
  462. to_name("?"++T, Acc, I) -> to_name(T, [$p|Acc], I+1);
  463. to_name("?"++T, Acc, I) -> to_name(T, [$p|Acc], I+1);
  464. to_name("?"++T, Acc, I) -> to_name(T, [$r|Acc], I+1);
  465. to_name("?"++T, Acc, I) -> to_name(T, [$r|Acc], I+1);
  466. to_name("?"++T, Acc, I) -> to_name(T, [$s|Acc], I+1);
  467. to_name("?"++T, Acc, I) -> to_name(T, [$s|Acc], I+1);
  468. to_name("?"++T, Acc, I) -> to_name(T, [$t|Acc], I+1);
  469. to_name("?"++T, Acc, I) -> to_name(T, [$t|Acc], I+1);
  470. to_name("?"++T, Acc, I) -> to_name(T, [$u|Acc], I+1);
  471. to_name("?"++T, Acc, I) -> to_name(T, [$u|Acc], I+1);
  472. to_name("?"++T, Acc, I) -> to_name(T, [$f|Acc], I+1);
  473. to_name("?"++T, Acc, I) -> to_name(T, [$f|Acc], I+1);
  474. to_name("?"++T, Acc, I) -> to_name(T, [$h|Acc], I+1);
  475. to_name("?"++T, Acc, I) -> to_name(T, [$h|Acc], I+1);
  476. to_name("?"++T, Acc, I) -> to_name(T, [$c|Acc], I+1);
  477. to_name("?"++T, Acc, I) -> to_name(T, [$c|Acc], I+1);
  478. to_name("?"++T, Acc, I) -> to_name(T, [$h,$c|Acc], I+2);
  479. to_name("?"++T, Acc, I) -> to_name(T, [$h,$c|Acc], I+2);
  480. to_name("?"++T, Acc, I) -> to_name(T, [$h,$s|Acc], I+2);
  481. to_name("?"++T, Acc, I) -> to_name(T, [$h,$s|Acc], I+2);
  482. to_name("?"++T, Acc, I) -> to_name(T, [$h,$h,$s|Acc], I+3);
  483. to_name("?"++T, Acc, I) -> to_name(T, [$h,$h,$s|Acc], I+3);
  484. to_name("?"++T, Acc, I) -> to_name(T, [$_|Acc], I+1);
  485. to_name("?"++T, Acc, I) -> to_name(T, [$_|Acc], I+1);
  486. to_name("?"++T, Acc, I) -> to_name(T, [$y|Acc], I+1);
  487. to_name("?"++T, Acc, I) -> to_name(T, [$y|Acc], I+1);
  488. to_name("?"++T, Acc, I) -> to_name(T, [$_|Acc], I+1);
  489. to_name("?"++T, Acc, I) -> to_name(T, [$_|Acc], I+1);
  490. to_name("?"++T, Acc, I) -> to_name(T, [$h,$e|Acc], I+2);
  491. to_name("?"++T, Acc, I) -> to_name(T, [$h,$e|Acc], I+2);
  492. to_name("?"++T, Acc, I) -> to_name(T, [$u,$y|Acc], I+2);
  493. to_name("?"++T, Acc, I) -> to_name(T, [$u,$y|Acc], I+2);
  494. to_name("?"++T, Acc, I) -> to_name(T, [$a,$y|Acc], I+2);
  495. to_name("?"++T, Acc, I) -> to_name(T, [$a,$y|Acc], I+2);
  496. % Ukrainian support
  497. to_name("?"++T, Acc, I) -> to_name(T, [$g|Acc], I+1);
  498. to_name("?"++T, Acc, I) -> to_name(T, [$g|Acc], I+1);
  499. to_name("?"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
  500. to_name("?"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
  501. to_name("?"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
  502. to_name("?"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
  503. to_name("?"++T, Acc, I) -> to_name(T, [$e,$y|Acc], I+2);
  504. to_name("?"++T, Acc, I) -> to_name(T, [$e,$y|Acc], I+2);
  505. % Polish support
  506. to_name("?"++T, Acc, I) -> to_name(T, [$a|Acc], I+1);
  507. to_name("?"++T, Acc, I) -> to_name(T, [$a|Acc], I+1);
  508. to_name("?"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
  509. to_name("?"++T, Acc, I) -> to_name(T, [$e|Acc], I+1);
  510. to_name("?"++T, Acc, I) -> to_name(T, [$c|Acc], I+1);
  511. to_name("?"++T, Acc, I) -> to_name(T, [$c|Acc], I+1);
  512. to_name("?"++T, Acc, I) -> to_name(T, [$l|Acc], I+1);
  513. to_name("?"++T, Acc, I) -> to_name(T, [$l|Acc], I+1);
  514. to_name("?"++T, Acc, I) -> to_name(T, [$n|Acc], I+1);
  515. to_name("?"++T, Acc, I) -> to_name(T, [$n|Acc], I+1);
  516. to_name("?"++T, Acc, I) -> to_name(T, [$s|Acc], I+1);
  517. to_name("?"++T, Acc, I) -> to_name(T, [$s|Acc], I+1);
  518. to_name("?"++T, Acc, I) -> to_name(T, [$z|Acc], I+1);
  519. to_name("?"++T, Acc, I) -> to_name(T, [$z|Acc], I+1);
  520. to_name("?"++T, Acc, I) -> to_name(T, [$z|Acc], I+1);
  521. to_name("?"++T, Acc, I) -> to_name(T, [$z|Acc], I+1);
  522. % Turkish support
  523. to_name("?"++T, Acc, I) -> to_name(T, [$s|Acc], I+1);
  524. to_name("?"++T, Acc, I) -> to_name(T, [$s|Acc], I+1);
  525. to_name("?"++T, Acc, I) -> to_name(T, [$g|Acc], I+1);
  526. to_name("?"++T, Acc, I) -> to_name(T, [$g|Acc], I+1);
  527. to_name("?"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
  528. to_name("?"++T, Acc, I) -> to_name(T, [$i|Acc], I+1);
  529. % Some entities - we might want to add generic code here, depends
  530. % on where to_name/1 is used (can we assume that the input is always html?)
  531. to_name("&amp;"++T, Acc, I) -> to_name(T, [$_|Acc], I+1);
  532. to_name("&lt;"++T, Acc, I) -> to_name(T, [$_|Acc], I+1);
  533. to_name("&gt;"++T, Acc, I) -> to_name(T, [$_|Acc], I+1);
  534. to_name("&#39;"++T, Acc, I) -> to_name(T, [$_|Acc], I+1);
  535. % Other sequences of characters are mapped to $_
  536. to_name([_C|T], [$_|_] = Acc, I) ->
  537. to_name(T, Acc, I+1);
  538. to_name([_C|T], Acc, I) ->
  539. to_name(T, [$_|Acc], I+1).
  540. %% @doc Replace a string inside another string
  541. %% Copyright 2008 Rusty Klophaus (Nitrogen, MIT License)
  542. replace([], _, _) -> [];
  543. replace(String, S1, S2) when is_list(String), is_list(S1), is_list(S2) ->
  544. Length = length(S1),
  545. case string:substr(String, 1, Length) of
  546. S1 ->
  547. S2 ++ replace(string:substr(String, Length + 1), S1, S2);
  548. _ ->
  549. [hd(String)|replace(tl(String), S1, S2)]
  550. end.
  551. %% @doc Truncate a string. Append the '...' character at the place of break off.
  552. %% @spec truncate(String, int()) -> String
  553. truncate(L, N) ->
  554. truncate(L, N, "…").
  555. truncate(B, N, Append) when is_binary(B) ->
  556. truncate(z_convert:to_list(B), N, Append);
  557. truncate(_L, N, _Append) when N =< 0 ->
  558. [];
  559. truncate(L, N, Append) ->
  560. truncate(L, N, Append, in_word, [], in_word, []).
  561. truncate([], _, _Append, _LastState, _Last, _AccState, Acc) ->
  562. lists:reverse(Acc);
  563. truncate(_, 0, _Append, sentence, Last, _AccState, _Acc) ->
  564. lists:reverse(Last);
  565. truncate(_, 0, Append, _, [], _AccState, Acc) ->
  566. lists:reverse(insert_acc(Append, Acc));
  567. truncate(_, 0, Append, _LastState, Last, _AccState, _Acc) ->
  568. lists:reverse(insert_acc(Append, Last));
  569. truncate([C|Rest], N, Append, LastState, Last, AccState, Acc)
  570. when C == $.; C == $!; C == $? ->
  571. case AccState of
  572. in_word -> truncate(Rest, N-1, Append, sentence, [C|Acc], sentence, [C|Acc]);
  573. word -> truncate(Rest, N-1, Append, sentence, [C|Acc], sentence, [C|Acc]);
  574. _ -> truncate(Rest, N-1, Append, LastState, Last, sentence, [C|Acc])
  575. end;
  576. truncate([C|Rest], N, Append, LastState, Last, AccState, Acc)
  577. when C == $;; C == $-; C == $, ->
  578. case AccState of
  579. in_word -> truncate(Rest, N-1, Append, sentence, Acc, word, [C|Acc]);
  580. _ -> truncate(Rest, N-1, Append, LastState, Last, word, [C|Acc])
  581. end;
  582. truncate([C|Rest], N, Append, LastState, Last, AccState, Acc)
  583. when C == 32; C == 9; C == 10; C == 13; C == $/; C == $|; C == $(; C == $); C == $" ->
  584. case AccState of
  585. in_word -> truncate(Rest, N-1, Append, word, Acc, word, [C|Acc]);
  586. _ -> truncate(Rest, N-1, Append, LastState, Last, word, [C|Acc])
  587. end;
  588. truncate([$&|_]=Input, N, Append, LastState, Last, AccState, Acc) ->
  589. {Rest1,Acc1} = get_entity(Input,Acc),
  590. case AccState of
  591. in_word -> truncate(Rest1, N-1, Append, word, Acc1, word, Acc1);
  592. _ -> truncate(Rest1, N-1, Append, LastState, Last, word, Acc1)
  593. end;
  594. %% Overlong encoding: start of a 2-byte sequence, but code point <= 127
  595. truncate([X,A|Rest], N, Append, LastState, Last, _AccState, Acc) when X >= 192, X =< 193 ->
  596. truncate(Rest, N-1, Append, LastState, Last, in_word, [A,X|Acc]);
  597. %% Start of 2-byte sequence
  598. truncate([X,A|Rest], N, Append, LastState, Last, _AccState, Acc) when X >= 194, X =< 223 ->
  599. truncate(Rest, N-1, Append, LastState, Last, in_word, [A,X|Acc]);
  600. %% Start of 3-byte sequence
  601. truncate([X,A,B|Rest], N, Append, LastState, Last, _AccState, Acc) when X >= 224, X =< 239 ->
  602. truncate(Rest, N-1, Append, LastState, Last, in_word, [B,A,X|Acc]);
  603. %% Start of 4-byte sequence
  604. truncate([X,A,B,C|Rest], N, Append, LastState, Last, _AccState, Acc) when X >= 240, X =< 244 ->
  605. truncate(Rest, N-1, Append, LastState, Last, in_word, [C,B,A,X|Acc]);
  606. %% Restricted by RFC 3629: start of 4-byte sequence for codepoint above 10FFFF
  607. truncate([X,A,B,C|Rest], N, Append, LastState, Last, _AccState, Acc) when X >= 245, X =< 247 ->
  608. truncate(Rest, N-1, Append, LastState, Last, in_word, [C,B,A,X|Acc]);
  609. %% Restricted by RFC 3629: start of 5-byte sequence
  610. truncate([X,A,B,C,D|Rest], N, Append, LastState, Last, _AccState, Acc) when X >= 248, X =< 251 ->
  611. truncate(Rest, N-1, Append, LastState, Last, in_word, [D,C,B,A,X|Acc]);
  612. %% Restricted by RFC 3629: start of 6-byte sequence
  613. truncate([X,A,B,C,D,E|Rest], N, Append, LastState, Last, _AccState, Acc) when X >= 252, X =< 253 ->
  614. truncate(Rest, N-1, Append, LastState, Last, in_word, [E,D,C,B,A,X|Acc]);
  615. %% Any other character
  616. truncate([C|Rest], N, Append, LastState, Last, _AccState, Acc) ->
  617. truncate(Rest, N-1, Append, LastState, Last, in_word, [C|Acc]).
  618. insert_acc([], Acc) ->
  619. Acc;
  620. insert_acc([H|T], Acc) ->
  621. insert_acc(T, [H|Acc]).
  622. get_entity([], Acc) ->
  623. {[],Acc};
  624. get_entity([$;|Rest], Acc) ->
  625. {Rest,[$;|Acc]};
  626. get_entity([C|Rest], Acc) ->
  627. get_entity(Rest, [C|Acc]).
  628. truncatewords(S, Words) ->
  629. truncatewords(S, Words, "…").
  630. truncatewords(S, Words, Append) when is_binary(S) ->
  631. truncatewords(z_convert:to_list(S), in_space, Words, Append, []);
  632. truncatewords(S, Words, Append) when is_list(S) ->
  633. truncatewords(S, in_space, Words, Append, []).
  634. truncatewords(_S, _State, 0, Append, Acc) ->
  635. lists:reverse(trim_left_func(Acc, fun iswordsep/1), Append);
  636. truncatewords([], _State, _Words, _Append, Acc) ->
  637. lists:reverse(Acc);
  638. truncatewords([C|Rest], in_space, Words, Append, Acc) ->
  639. case iswordsep(C) of
  640. true -> truncatewords(Rest, in_space, Words, Append, [C|Acc]);
  641. false -> truncatewords(Rest, in_word, Words, Append, [C|Acc])
  642. end;
  643. truncatewords([C|Rest], in_word, Words, Append, Acc) ->
  644. case iswordsep(C) of
  645. true -> truncatewords(Rest, in_space, Words-1, Append, [C|Acc]);
  646. false -> truncatewords(Rest, in_word, Words, Append, [C|Acc])
  647. end.
  648. iswordsep($\s) -> true;
  649. iswordsep($\n) -> true;
  650. iswordsep($\r) -> true;
  651. iswordsep($\t) -> true;
  652. iswordsep($,) -> true;
  653. iswordsep($:) -> true;
  654. iswordsep($;) -> true;
  655. iswordsep(_) -> false.
  656. %% @doc Split the binary into lines. Line separators can be \r, \n or \r\n.
  657. split_lines(B) when is_binary(B) ->
  658. split_lines(B, <<>>, []).
  659. split_lines(<<>>, Line, Acc) ->
  660. lists:reverse([Line|Acc]);
  661. split_lines(<<13,10,Rest/binary>>, Line, Acc) ->
  662. split_lines(Rest, <<>>, [Line|Acc]);
  663. split_lines(<<13,Rest/binary>>, Line, Acc) ->
  664. split_lines(Rest, <<>>, [Line|Acc]);
  665. split_lines(<<10,Rest/binary>>, Line, Acc) ->
  666. split_lines(Rest, <<>>, [Line|Acc]);
  667. split_lines(<<C, Rest/binary>>, Line, Acc) ->
  668. split_lines(Rest, <<Line/binary, C>>, Acc).
  669. %% @doc Escape special characters for ical RFC2445 elements
  670. escape_ical(L) when is_list(L) ->
  671. escape_ical(iolist_to_binary(L));
  672. escape_ical(B) when is_binary(B) ->
  673. escape_ical(B, <<>>, 0);
  674. escape_ical(A) when is_atom(A) ->
  675. escape_ical(atom_to_list(A)).
  676. escape_ical(<<>>, Acc, _N) -> Acc;
  677. escape_ical(B, Acc, N) when N >= 70 -> escape_ical(B, <<Acc/binary, 13, 10, 32>>, 0);
  678. escape_ical(<<13, 10, Rest/binary>>, Acc, N) -> escape_ical(Rest, <<Acc/binary, $\\, $n>>, N+2);
  679. escape_ical(<<10, Rest/binary>>, Acc, N) -> escape_ical(Rest, <<Acc/binary, $\\, $n>>, N+2);
  680. escape_ical(<<9, Rest/binary>>, Acc, N) -> escape_ical(Rest, <<Acc/binary, 32>>, N+1);
  681. escape_ical(<<$", Rest/binary>>, Acc, N) -> escape_ical(Rest, <<Acc/binary, $\\, $">>, N+2);
  682. escape_ical(<<$,, Rest/binary>>, Acc, N) -> escape_ical(Rest, <<Acc/binary, $\\, $,>>, N+2);
  683. escape_ical(<<$:, Rest/binary>>, Acc, N) -> escape_ical(Rest, <<Acc/binary, $", $:, $">>, N+3);
  684. escape_ical(<<$;, Rest/binary>>, Acc, N) -> escape_ical(Rest, <<Acc/binary, $\\, $;>>, N+2);
  685. escape_ical(<<$\\, Rest/binary>>, Acc, N) -> escape_ical(Rest, <<Acc/binary, $\\, $\\>>, N+2);
  686. escape_ical(<<C, Rest/binary>>, Acc, N) -> escape_ical(Rest, <<Acc/binary, C>>, N+1).
  687. %% @doc Return true if Start is a prefix of Word
  688. %% @spec starts_with(String, String) -> bool()
  689. starts_with(Start, B) when is_binary(Start), is_binary(B) ->
  690. StartSize = size(Start),
  691. case B of
  692. <<Start:StartSize/binary, _/binary>> -> true;
  693. _ -> false
  694. end;
  695. starts_with(Start, String) ->
  696. starts_with(iolist_to_binary(Start), iolist_to_binary(String)).
  697. %% @doc Return true iff Word ends with End
  698. %% @spec ends_with(String, String) -> bool()
  699. ends_with(End, B) when is_binary(End), is_binary(B) ->
  700. StartSize = size(B) - size(End),
  701. case B of
  702. <<_:StartSize/binary, End/binary>> -> true;
  703. _ ->false
  704. end;
  705. ends_with(End, String) ->
  706. ends_with(iolist_to_binary(End), iolist_to_binary(String)).
  707. %% @doc Return true iff What is found in the string
  708. %% @spec contains(String, String) -> bool()
  709. contains(What, B) when is_binary(What), is_binary(B) ->
  710. contains(What, size(What), B, 0);
  711. contains(What, String) ->
  712. contains(iolist_to_binary(What), iolist_to_binary(String)).
  713. contains(_What, _SizeWhat, B, C) when C > size(B) ->
  714. false;
  715. contains(What, SizeWhat, B, C) ->
  716. case B of
  717. <<_:C/binary, What:SizeWhat/binary, _/binary>> ->true;
  718. _ ->contains(What, SizeWhat, B, C + 1)
  719. end.
  720. %% @doc Split a string, see http://www.erlang.org/pipermail/erlang-questions/2008-October/038896.html
  721. %% @spec split(String, String) -> list()
  722. split(String, []) ->
  723. split0(String);
  724. split(String, [Sep]) when is_integer(Sep) ->
  725. split1(String, Sep);
  726. split(String, [C1,C2|L]) when is_integer(C1), is_integer(C2) ->
  727. split2(String, C1, C2, L).
  728. %% Split a string at "", which is deemed to occur _between_
  729. %% adjacent characters, but queerly, not at the beginning
  730. %% or the end.
  731. split0([C|Cs]) ->
  732. [[C] | split0(Cs)];
  733. split0([]) ->
  734. [].
  735. %% Split a string at a single character separator.
  736. split1(String, Sep) ->
  737. split1_loop(String, Sep, "").
  738. split1_loop([Sep|String], Sep, Rev) ->
  739. [lists:reverse(Rev) | split1(String, Sep)];
  740. split1_loop([Chr|String], Sep, Rev) ->
  741. split1_loop(String, Sep, [Chr|Rev]);
  742. split1_loop([], _, Rev) ->
  743. [lists:reverse(Rev)].
  744. %% Split a string at a multi-character separator
  745. %% [C1,C2|L]. These components are split out for
  746. %% a fast match.
  747. split2(String, C1, C2, L) ->
  748. split2_loop(String, C1, C2, L, "").
  749. split2_loop([C1|S = [C2|String]], C1, C2, L, Rev) ->
  750. case split_prefix(L, String)
  751. of no -> split2_loop(S, C1, C2, L, [C1|Rev])
  752. ; Rest -> [lists:reverse(Rev) | split2(Rest, C1, C2, L)]
  753. end;
  754. split2_loop([Chr|String], C1, C2, L, Rev) ->
  755. split2_loop(String, C1, C2, L, [Chr|Rev]);
  756. split2_loop([], _, _, _, Rev) ->
  757. [lists:reverse(Rev)].
  758. split_prefix([C|L], [C|S]) -> split_prefix(L, S);
  759. split_prefix([], S) -> S;
  760. split_prefix(_, _) -> no.
  761. test() ->
  762. A = "ьзgen",
  763. A = to_lower(to_upper(A)),
  764. "ucgen" = to_name(A),
  765. "a" = first_char("aap"),
  766. "?" = first_char("???xx"),
  767. "?" = first_char("?aap"),
  768. ok.