/bianca/src/main/java/com/clevercloud/bianca/lib/string/StringModule.java
Java | 6494 lines | 4926 code | 974 blank | 594 comment | 1090 complexity | f89a3a083427accd155fc2116ebd73b1 MD5 | raw file
Possible License(s): GPL-2.0, MPL-2.0-no-copyleft-exception
Large files files are truncated, but you can click here to view the full file
- /*
- * Copyright (c) 1998-2010 Caucho Technology -- all rights reserved
- * Copyright (c) 2011-2012 Clever Cloud SAS -- all rights reserved
- *
- * This file is part of Bianca(R) Open Source
- *
- * Each copy or derived work must preserve the copyright notice and this
- * notice unmodified.
- *
- * Bianca Open Source is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Bianca Open Source is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
- * of NON-INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Bianca Open Source; if not, write to the
- *
- * Free Software Foundation, Inc.
- * 59 Temple Place, Suite 330
- * Boston, MA 02111-1307 USA
- *
- * @author Scott Ferguson
- * @author Marc-Antoine Perennou <Marc-Antoine@Perennou.com>
- */
- package com.clevercloud.bianca.lib.string;
- import com.clevercloud.bianca.BiancaException;
- import com.clevercloud.bianca.BiancaModuleException;
- import com.clevercloud.bianca.annotation.*;
- import com.clevercloud.bianca.env.*;
- import com.clevercloud.bianca.lib.file.BinaryOutput;
- import com.clevercloud.bianca.lib.file.FileModule;
- import com.clevercloud.bianca.module.AbstractBiancaModule;
- import com.clevercloud.util.*;
- import com.clevercloud.vfs.Path;
- import java.io.IOException;
- import java.io.InputStream;
- import java.math.BigInteger;
- import java.security.MessageDigest;
- import java.text.DecimalFormat;
- import java.text.DecimalFormatSymbols;
- import java.text.NumberFormat;
- import java.util.*;
- import java.util.logging.Level;
- import java.util.logging.Logger;
- /**
- * PHP functions implemented from the string module
- */
- public class StringModule extends AbstractBiancaModule {
- private static final Logger log =
- Logger.getLogger(StringModule.class.getName());
- private static final L10N L = new L10N(StringModule.class);
- public static final int CODESET = 14;
- public static final int CRYPT_SALT_LENGTH = 2;
- public static final int CRYPT_STD_DES = 0;
- public static final int CRYPT_EXT_DES = 0;
- public static final int CRYPT_MD5 = 0;
- public static final int CRYPT_BLOWFISH = 0;
- public static final int CHAR_MAX = 1;
- public static final int LC_CTYPE = 1;
- public static final int LC_NUMERIC = 2;
- public static final int LC_TIME = 3;
- public static final int LC_COLLATE = 4;
- public static final int LC_MONETARY = 5;
- public static final int LC_ALL = 6;
- public static final int LC_MESSAGES = 7;
- public static final int NOEXPR = (10 << 15) + 1;
- public static final int STR_PAD_LEFT = 1;
- public static final int STR_PAD_RIGHT = 0;
- public static final int STR_PAD_BOTH = 2;
- public static final int YESEXPR = 10 << 15;
- public static final int RADIXCHAR = 0x10000;
- public static final int DECIMAL_POINT = RADIXCHAR; // Returns same value as RADIXCHAR
- public static final int THOUSEP = 0x10001;
- public static final int THOUSANDS_SEP = THOUSEP; // Returns same value as THOUSEP
- public static final int ABDAY_1 = 0x20000;
- public static final int ABDAY_2 = 0x20001;
- public static final int ABDAY_3 = 0x20002;
- public static final int ABDAY_4 = 0x20003;
- public static final int ABDAY_5 = 0x20004;
- public static final int ABDAY_6 = 0x20005;
- public static final int ABDAY_7 = 0x20006;
- public static final int DAY_1 = 0x20007;
- public static final int DAY_2 = 0x20008;
- public static final int DAY_3 = 0x20009;
- public static final int DAY_4 = 0x2000a;
- public static final int DAY_5 = 0x2000b;
- public static final int DAY_6 = 0x2000c;
- public static final int DAY_7 = 0x2000d;
- public static final int ABMON_1 = 0x2000e;
- public static final int ABMON_2 = 0x2000f;
- public static final int ABMON_3 = 0x20010;
- public static final int ABMON_4 = 0x20011;
- public static final int ABMON_5 = 0x20012;
- public static final int ABMON_6 = 0x20013;
- public static final int ABMON_7 = 0x20014;
- public static final int ABMON_8 = 0x20015;
- public static final int ABMON_9 = 0x20016;
- public static final int ABMON_10 = 0x20017;
- public static final int ABMON_11 = 0x20018;
- public static final int ABMON_12 = 0x20019;
- public static final int MON_1 = 0x2001a;
- public static final int MON_2 = 0x2001b;
- public static final int MON_3 = 0x2001c;
- public static final int MON_4 = 0x2001d;
- public static final int MON_5 = 0x2001e;
- public static final int MON_6 = 0x2001f;
- public static final int MON_7 = 0x20020;
- public static final int MON_8 = 0x20021;
- public static final int MON_9 = 0x20022;
- public static final int MON_10 = 0x20023;
- public static final int MON_11 = 0x20024;
- public static final int MON_12 = 0x20025;
- public static final int AM_STR = 0x20026;
- public static final int PM_STR = 0x20027;
- public static final int D_T_FMT = 0x20028;
- public static final int D_FMT = 0x20029;
- public static final int T_FMT = 0x2002a;
- public static final int T_FMT_AMPM = 0x2002b;
- public static final int ERA = 0x2002c;
- public static final int ERA_D_FMT = 0x2002e;
- public static final int ERA_D_T_FMT = 0x20030;
- public static final int ERA_T_FMT = 0x20031;
- public static final int CRNCYSTR = 0x4000f;
- private static final DecimalFormatSymbols DEFAULT_DECIMAL_FORMAT_SYMBOLS;
- private static final BigInteger BIG_TEN = new BigInteger("10");
- private static final BigInteger BIG_2_64 = BigInteger.ONE.shiftLeft(64);
- private static final FreeList<MessageDigest> _md5FreeList = new FreeList<MessageDigest>(16);
- private static final int LEVENSHTEIN_MAX_LENGTH = 255;
- /**
- * Escapes a string using C syntax.
- *
- * @param source the source string to convert
- * @param characters the set of characters to convert
- * @return the escaped string
- * @see #stripcslashes
- */
- public static StringValue addcslashes(
- Env env, StringValue source, String characters) {
- if (characters == null) {
- characters = "";
- }
- boolean[] bitmap = parseCharsetBitmap(env, characters);
- int length = source.length();
- StringValue sb = new StringValue();
- for (int i = 0; i < length; i++) {
- char ch = source.charAt(i);
- if (ch >= 256 || !bitmap[ch]) {
- sb.append(ch);
- continue;
- }
- switch (ch) {
- case 0x07:
- sb.append("\\a");
- break;
- case '\b':
- sb.append("\\b");
- break;
- case '\t':
- sb.append("\\t");
- break;
- case '\n':
- sb.append("\\n");
- break;
- case 0xb:
- sb.append("\\v");
- break;
- case '\f':
- sb.append("\\f");
- break;
- case '\r':
- sb.append("\\r");
- break;
- default:
- if (ch < 0x20 || ch >= 0x7f) {
- // save as octal
- sb.append("\\");
- sb.append((char) ('0' + ((ch >> 6) & 7)));
- sb.append((char) ('0' + ((ch >> 3) & 7)));
- sb.append((char) ('0' + ((ch) & 7)));
- break;
- } else {
- sb.append("\\");
- sb.append(ch);
- break;
- }
- }
- }
- return sb;
- }
- /**
- * Parses the cslashes bitmap returning an actual bitmap.
- *
- * @param charset the bitmap string
- * @return the actual bitmap
- */
- private static boolean[] parseCharsetBitmap(Env env, String charset) {
- boolean[] bitmap = new boolean[256];
- int length = charset.length();
- for (int i = 0; i < length; i++) {
- char ch = charset.charAt(i);
- // TODO: the bitmap eventual might need to deal with unicode
- if (ch >= 256) {
- continue;
- }
- bitmap[ch] = true;
- if (length <= i + 3) {
- continue;
- }
- if (charset.charAt(i + 1) != '.' || charset.charAt(i + 2) != '.') {
- continue;
- }
- char last = charset.charAt(i + 3);
- if (last < ch) {
- env.warning(L.l("character set range is invalid: {0}..{1}",
- ch, last));
- continue;
- }
- i += 3;
- for (; ch <= last; ch++) {
- bitmap[ch] = true;
- }
- // TODO: handling of '@'?
- }
- return bitmap;
- }
- /**
- * Escapes a string for db characters.
- *
- * @param source the source string to convert
- * @return the escaped string
- */
- public static StringValue addslashes(StringValue source) {
- StringValue sb = new StringValue();
- int length = source.length();
- for (int i = 0; i < length; i++) {
- char ch = source.charAt(i);
- switch (ch) {
- case 0x0:
- sb.append("\\0");
- break;
- case '\'':
- sb.append("\\'");
- break;
- case '\"':
- sb.append("\\\"");
- break;
- case '\\':
- sb.append("\\\\");
- break;
- default:
- sb.append(ch);
- break;
- }
- }
- return sb;
- }
- /**
- * Converts a binary value to a hex value.
- */
- public static StringValue bin2hex(Env env, InputStream is) {
- try {
- StringValue sb = new StringValue();
- int ch;
- while ((ch = is.read()) >= 0) {
- int d = (ch >> 4) & 0xf;
- if (d < 10) {
- sb.append((char) (d + '0'));
- } else {
- sb.append((char) (d + 'a' - 10));
- }
- d = (ch) & 0xf;
- if (d < 10) {
- sb.append((char) (d + '0'));
- } else {
- sb.append((char) (d + 'a' - 10));
- }
- }
- return sb;
- } catch (IOException e) {
- throw new BiancaModuleException(e);
- }
- }
- /**
- * Alias of rtrim. Removes trailing whitespace.
- *
- * @param env the bianca environment
- * @param str the string to be trimmed
- * @param charset optional set of characters to trim
- * @return the trimmed string
- */
- public static StringValue chop(Env env,
- StringValue str,
- @Optional String charset) {
- return rtrim(env, str, charset);
- }
- /**
- * converts a number to its character equivalent
- *
- * @param value the integer value
- * @return the string equivalent
- */
- public static StringValue chr(Env env, long value) {
- StringValue sb = new StringValue();
- sb.append((char) value);
- return sb;
- }
- /**
- * Splits a string into chunks
- *
- * @param body the body string
- * @param chunkLen the optional chunk length, defaults to 76
- * @param end the optional end value, defaults to "\r\n"
- */
- public static String chunk_split(String body,
- @Optional("76") int chunkLen,
- @Optional("\"\\r\\n\"") String end) {
- if (body == null) {
- body = "";
- }
- if (end == null) {
- end = "";
- }
- if (chunkLen < 1) // TODO: real exn
- {
- throw new IllegalArgumentException(L.l("bad value {0}", chunkLen));
- }
- StringBuilder sb = new StringBuilder();
- int i = 0;
- for (; i + chunkLen <= body.length(); i += chunkLen) {
- sb.append(body.substring(i, i + chunkLen));
- sb.append(end);
- }
- if (i < body.length()) {
- sb.append(body.substring(i));
- sb.append(end);
- }
- return sb.toString();
- }
- /**
- * Converts from one cyrillic set to another.
- * <p/>
- * This implementation does nothing, because bianca stores strings as
- * 16 bit unicode.
- */
- public static String convert_cyr_string(Env env,
- String str,
- String from,
- String to) {
- env.stub("convert_cyr_string");
- return str;
- }
- public static Value convert_uudecode(Env env, StringValue source) {
- int length = source.length();
- if (length == 0) {
- return BooleanValue.FALSE;
- }
- StringBuilder builder = new StringBuilder();
- int i = 0;
- while (i < length) {
- int ch1 = source.charAt(i++);
- if (ch1 == 0x60 || ch1 == 0x20) {
- break;
- } else if (ch1 < 0x20 || 0x5f < ch1) {
- continue;
- }
- int sublen = ch1 - 0x20;
- while (sublen > 0) {
- int code;
- code = ((source.charAt(i++) - 0x20) & 0x3f) << 18;
- code += ((source.charAt(i++) - 0x20) & 0x3f) << 12;
- code += ((source.charAt(i++) - 0x20) & 0x3f) << 6;
- code += ((source.charAt(i++) - 0x20) & 0x3f);
- builder.append(code >> 16);
- if (sublen > 1) {
- builder.append(code >> 8);
- }
- if (sublen > 2) {
- builder.append(code);
- }
- sublen -= 3;
- }
- }
- return env.createString(builder.toString());
- }
- /**
- * uuencode a string.
- */
- public static Value convert_uuencode(StringValue source) {
- if (source.length() == 0) {
- return BooleanValue.FALSE;
- }
- StringValue result = new StringValue();
- int i = 0;
- int length = source.length();
- while (i < length) {
- int sublen = length - i;
- if (45 < sublen) {
- sublen = 45;
- }
- result.append((char) (sublen + 0x20));
- int end = i + sublen;
- while (i < end) {
- int code = source.charAt(i++) << 16;
- if (i < length) {
- code += source.charAt(i++) << 8;
- }
- if (i < length) {
- code += source.charAt(i++);
- }
- result.append(toUUChar(((code >> 18) & 0x3f)));
- result.append(toUUChar(((code >> 12) & 0x3f)));
- result.append(toUUChar(((code >> 6) & 0x3f)));
- result.append(toUUChar(((code) & 0x3f)));
- }
- result.append('\n');
- }
- result.append((char) 0x60);
- result.append('\n');
- return result;
- }
- /**
- * Returns an array of information about the characters.
- */
- public static Value count_chars(StringValue data,
- @Optional("0") int mode) {
- int[] count = new int[256];
- int length = data.length();
- for (int i = 0; i < length; i++) {
- count[data.charAt(i) & 0xff] += 1;
- }
- switch (mode) {
- case 0: {
- ArrayValue result = new ArrayValueImpl();
- for (int i = 0; i < count.length; i++) {
- result.put(LongValue.create(i), LongValue.create(count[i]));
- }
- return result;
- }
- case 1: {
- ArrayValue result = new ArrayValueImpl();
- for (int i = 0; i < count.length; i++) {
- if (count[i] > 0) {
- result.put(LongValue.create(i), LongValue.create(count[i]));
- }
- }
- return result;
- }
- case 2: {
- ArrayValue result = new ArrayValueImpl();
- for (int i = 0; i < count.length; i++) {
- if (count[i] == 0) {
- result.put(LongValue.create(i), LongValue.create(count[i]));
- }
- }
- return result;
- }
- case 3: {
- StringValue sb = new StringValue();
- for (int i = 0; i < count.length; i++) {
- if (count[i] > 0) {
- sb.append((char) i);
- }
- }
- return sb;
- }
- case 4: {
- StringValue sb = new StringValue();
- for (int i = 0; i < count.length; i++) {
- if (count[i] == 0) {
- sb.append((char) i);
- }
- }
- return sb;
- }
- default:
- return BooleanValue.FALSE;
- }
- }
- /**
- * Calculates the crc32 value for a string
- *
- * @param str the string value
- * @return the crc32 hash
- */
- public static long crc32(StringValue str) {
- return str.getCrc32Value();
- }
- public static String crypt(String string, @Optional String salt) {
- if (string == null) {
- string = "";
- }
- if (salt == null || salt.equals("")) {
- salt = ("" + Crypt.resultToChar(RandomUtil.nextInt(0x40))
- + Crypt.resultToChar(RandomUtil.nextInt(0x40)));
- }
- return Crypt.crypt(string, salt);
- }
- // TODO: echo
- /**
- * Explodes a string into an array
- *
- * @param separator the separator string
- * @param string the string to be exploded
- * @param limit the max number of elements
- * @return an array of exploded values
- */
- public static Value explode(Env env,
- StringValue separator,
- StringValue string,
- @Optional("0x7fffffff") long limit) {
- if (separator.length() == 0) {
- env.warning(L.l("Delimiter is empty"));
- return BooleanValue.FALSE;
- }
- int head = 0;
- ArrayValue array = new ArrayValueImpl();
- int separatorLength = separator.length();
- int stringLength = string.length();
- long ulimit;
- if (limit >= 0) {
- ulimit = limit;
- } else {
- ulimit = 0x7fffffff;
- }
- for (int i = 0; i < stringLength; ++i) {
- if (ulimit <= array.getSize() + 1) {
- break;
- }
- if (string.regionMatches(i, separator.toString(), 0)) {
- StringValue chunk = string.substring(head, i);
- array.append(chunk);
- head = i + separatorLength;
- i = head - 1;
- }
- }
- StringValue chunk = string.substring(head);
- array.append(chunk);
- while (array.getSize() > 0 && limit++ < 0) {
- array.pop(env);
- }
- return array;
- }
- /**
- * Use printf style formatting to write a string to a file.
- *
- * @param fd the file to write to
- * @param format the format string
- * @param args the valujes to apply to the format string
- */
- public static Value fprintf(Env env,
- @NotNull BinaryOutput os,
- StringValue format,
- Value[] args) {
- Value value = sprintf(env, format, args);
- return FileModule.fwrite(env, os, value.toInputStream(),
- Integer.MAX_VALUE);
- }
- /**
- * implodes an array into a string
- *
- * @param glueV the separator string
- * @param piecesV the array to be imploded
- * @return a string of imploded values
- */
- public static Value implode(Env env,
- Value glueV,
- @Optional Value piecesV) {
- StringValue glue;
- ArrayValue pieces;
- if ((piecesV.isArray() && glueV.isArray())
- || glueV.isArray()) {
- pieces = glueV.toArrayValue(env);
- glue = piecesV.toStringValue();
- } else if (piecesV.isArray()) {
- pieces = piecesV.toArrayValue(env);
- glue = glueV.toStringValue();
- } else {
- env.warning(L.l("neither argument to implode is an array: {0}, {1}",
- glueV.getClass().getName(), piecesV.getClass().getName()));
- return NullValue.NULL;
- }
- StringValue sb = new StringValue();
- boolean isFirst = true;
- Iterator<Value> iter = pieces.getValueIterator(env);
- while (iter.hasNext()) {
- if (!isFirst) {
- sb = sb.append(glue);
- }
- isFirst = false;
- sb = sb.append(iter.next());
- }
- return sb;
- }
- /**
- * implodes an array into a string
- *
- * @param glueV the separator string
- * @param piecesV the array to be imploded
- * @return a string of imploded values
- */
- public static Value join(Env env,
- Value glueV,
- Value piecesV) {
- return implode(env, glueV, piecesV);
- }
- /**
- * Lowercases the first character
- *
- * @param string the input string
- */
- public static StringValue lcfirst(Env env, StringValue string) {
- if (string == null) {
- return StringValue.EMPTY;
- } else if (string.length() == 0) {
- return string;
- }
- StringValue sb = new StringValue();
- sb = sb.append(Character.toLowerCase(string.charAt(0)));
- sb = sb.append(string, 1, string.length());
- return sb;
- }
- /**
- * Calculate Levenshtein distance between two strings
- *
- * @param str1 first string
- * @param str2 second string
- * @param cost_ins defines the cost of insertion
- * @param cost_rep defines the cost of replacement
- * @param cost_del defines the cost of deletion
- * @return int Levenshtein-Distance between the two argument strings
- */
- public static int levenshtein(Env env, String str1, String str2, @Optional("1") int cost_ins, @Optional("1") int cost_rep, @Optional("1") int cost_del) {
- int distance = -1;
- int i1, i2, c0, c1, c2, l1, l2;
- l1 = str1.length();
- l2 = str2.length();
- int[] p1 = new int[l2 + 1];
- int[] p2 = new int[l2 + 1];
- int[] tmp = new int[l2 + 1];
- if (l1 == 0) {
- return l2 * cost_ins;
- }
- if (l2 == 0) {
- return l1 * cost_del;
- }
- // TODO: keep this limitation ? really ?
- if ((l1 > LEVENSHTEIN_MAX_LENGTH) || (l2 > LEVENSHTEIN_MAX_LENGTH)) {
- distance = -1;
- } else {
- for (i2 = 0; i2 <= l2; i2++) {
- p1[i2] = i2 * cost_ins;
- }
- for (i1 = 0; i1 < l1; i1++) {
- p2[0] = p1[0] + cost_del;
- for (i2 = 0; i2 < l2; i2++) {
- c0 = p1[i2] + ((str1.charAt(i1) == str2.charAt(i2)) ? 0 : cost_rep);
- c1 = p1[i2 + 1] + cost_del;
- if (c1 < c0) {
- c0 = c1;
- }
- c2 = p2[i2] + cost_ins;
- if (c2 < c0) {
- c0 = c2;
- }
- p2[i2 + 1] = c0;
- }
- tmp = p1;
- p1 = p2;
- p2 = tmp;
- }
- c0 = p1[l2];
- distance = c0;
- }
- if (distance < 0) {
- env.warning(L.l("Argument string(s) too long"));
- }
- return distance;
- }
- /**
- * Gets locale-specific symbols.
- * XXX: locale charset
- */
- public static ArrayValue localeconv(Env env) {
- ArrayValueImpl array = new ArrayValueImpl();
- BiancaLocale money = env.getLocaleInfo().getMonetary();
- Locale locale = money.getLocale();
- DecimalFormatSymbols decimal = new DecimalFormatSymbols(locale);
- Currency currency = NumberFormat.getInstance(locale).getCurrency();
- array.put(env.createString("decimal_point"),
- new StringValue(decimal.getDecimalSeparator()));
- array.put(env.createString("thousands_sep"),
- new StringValue(decimal.getGroupingSeparator()));
- //array.put("grouping", "");
- array.put(env.createString("int_curr_symbol"),
- new StringValue(decimal.getInternationalCurrencySymbol()));
- array.put(env.createString("currency_symbol"),
- new StringValue(decimal.getCurrencySymbol()));
- array.put(env.createString("mon_decimal_point"),
- new StringValue(decimal.getMonetaryDecimalSeparator()));
- array.put(env.createString("mon_thousands_sep"),
- new StringValue(decimal.getGroupingSeparator()));
- //array.put("mon_grouping", "");
- array.put(new StringValue("positive_sign"), StringValue.EMPTY);
- array.put(env.createString("negative_sign"),
- new StringValue(decimal.getMinusSign()));
- array.put(env.createString("int_frac_digits"),
- LongValue.create(currency.getDefaultFractionDigits()));
- array.put(env.createString("frac_digits"),
- LongValue.create(currency.getDefaultFractionDigits()));
- //array.put("p_cs_precedes", "");
- //array.put("p_sep_by_space", "");
- //array.put("n_cs_precedes", "");
- //array.put("n_sep_by_space", "");
- //array.put("p_sign_posn", "");
- //array.put("n_sign_posn", "");
- return array;
- }
- /**
- * Removes leading whitespace.
- *
- * @param string the string to be trimmed
- * @param characters optional set of characters to trim
- * @return the trimmed string
- */
- public static StringValue ltrim(Env env,
- StringValue string,
- @Optional String characters) {
- if (characters == null) {
- characters = "";
- }
- boolean[] trim;
- if (characters.equals("")) {
- trim = TRIM_WHITESPACE;
- } else {
- trim = parseCharsetBitmap(env, characters);
- }
- for (int i = 0; i < string.length(); i++) {
- char ch = string.charAt(i);
- if (ch >= 256 || !trim[ch]) {
- if (i == 0) {
- return string;
- } else {
- return string.substring(i);
- }
- }
- }
- return StringValue.EMPTY;
- }
- /**
- * returns the md5 hash
- *
- * @param source the string
- * @param rawOutput if true, return the raw binary
- * @return a string of imploded values
- */
- public static Value md5(Env env,
- InputStream is,
- @Optional boolean rawOutput) {
- try {
- MessageDigest md = _md5FreeList.allocate();
- if (md == null) {
- md = MessageDigest.getInstance("MD5");
- }
- md.reset();
- int ch;
- while ((ch = is.read()) >= 0) {
- md.update((byte) ch);
- }
- byte[] digest = md.digest();
- _md5FreeList.free(md);
- return hashToValue(digest, rawOutput);
- } catch (Exception e) {
- throw new BiancaModuleException(e);
- }
- }
- /**
- * returns the md5 hash
- *
- * @param source the string
- * @param rawOutput if true, return the raw binary
- * @return a string of imploded values
- */
- public static Value md5_file(Env env,
- Path source,
- @Optional boolean rawOutput) {
- try {
- MessageDigest md = MessageDigest.getInstance("MD5");
- InputStream is = null;
- try {
- is = source.openRead();
- int d;
- while ((d = is.read()) >= 0) {
- md.update((byte) d);
- }
- byte[] digest = md.digest();
- return hashToValue(digest, rawOutput);
- } catch (IOException e) {
- log.log(Level.FINE, e.toString(), e);
- return BooleanValue.FALSE;
- } finally {
- try {
- if (is != null) {
- is.close();
- }
- } catch (IOException e) {
- }
- }
- } catch (Exception e) {
- throw new BiancaModuleException(e);
- }
- }
- /**
- * Returns the metaphone of a string.
- * This implementation produces identical results to the php version,
- * which does contain some bugs.
- */
- public static String metaphone(String string) {
- if (string == null) {
- string = "";
- }
- int length = string.length();
- int index = 0;
- char ch = 0;
- // ignore everything up until first letter
- for (; index < length; index++) {
- ch = toUpperCase(string.charAt(index));
- if ('A' <= ch && ch <= 'Z') {
- break;
- }
- }
- if (index == length) {
- return "";
- }
- int lastIndex = length - 1;
- StringBuilder result = new StringBuilder(length);
- // special case first letter
- char nextCh = index < lastIndex
- ? toUpperCase(string.charAt(index + 1))
- : 0;
- switch (ch) {
- case 'A':
- if (nextCh == 'E') {
- result.append('E');
- index += 2;
- } else {
- result.append('A');
- index += 1;
- }
- break;
- case 'E':
- case 'I':
- case 'O':
- case 'U':
- result.append(ch);
- index += 1;
- break;
- case 'G':
- case 'K':
- case 'P':
- if (nextCh == 'N') {
- result.append('N');
- index += 2;
- }
- break;
- case 'W':
- if (nextCh == 'H' || nextCh == 'R') {
- result.append(nextCh);
- index += 2;
- } else {
- switch (nextCh) {
- case 'A':
- case 'E':
- case 'I':
- case 'O':
- case 'U':
- result.append('W');
- index += 2;
- break;
- default:
- break;
- }
- }
- break;
- case 'X':
- result.append('S');
- index += 1;
- break;
- default:
- break;
- }
- // the rest of the letters
- char prevCh;
- for (; index < length; index++) {
- if (index > 0) {
- prevCh = toUpperCase(string.charAt(index - 1));
- } else {
- prevCh = 0;
- }
- ch = toUpperCase(string.charAt(index));
- if (ch < 'A' || ch > 'Z') {
- continue;
- }
- if (ch == prevCh && ch != 'C') {
- continue;
- }
- if (index + 1 < length) {
- nextCh = toUpperCase(string.charAt(index + 1));
- } else {
- nextCh = 0;
- }
- char nextnextCh;
- if (index + 2 < length) {
- nextnextCh = toUpperCase(string.charAt(index + 2));
- } else {
- nextnextCh = 0;
- }
- switch (ch) {
- case 'B':
- if (prevCh != 'M') {
- result.append('B');
- }
- break;
- case 'C':
- switch (nextCh) {
- case 'E':
- case 'I':
- case 'Y':
- // makesoft
- if (nextCh == 'I' && nextnextCh == 'A') {
- result.append('X');
- } else if (prevCh == 'S') {
- } else {
- result.append('S');
- }
- break;
- default:
- if (nextCh == 'H') {
- result.append('X');
- index++;
- } else {
- result.append('K');
- }
- break;
- }
- break;
- case 'D':
- if (nextCh == 'G') {
- switch (nextnextCh) {
- case 'E':
- case 'I':
- case 'Y':
- // makesoft
- result.append('J');
- index++;
- break;
- default:
- result.append('T');
- break;
- }
- } else {
- result.append('T');
- }
- break;
- case 'G':
- if (nextCh == 'H') {
- boolean isSilent = false;
- if (index - 3 >= 0) {
- char prev3Ch = toUpperCase(string.charAt(index - 3));
- switch (prev3Ch) {
- // noghtof
- case 'B':
- case 'D':
- case 'H':
- isSilent = true;
- break;
- default:
- break;
- }
- }
- if (!isSilent) {
- if (index - 4 >= 0) {
- char prev4Ch = toUpperCase(string.charAt(index - 4));
- isSilent = (prev4Ch == 'H');
- }
- }
- if (!isSilent) {
- result.append('F');
- index++;
- }
- } else if (nextCh == 'N') {
- char nextnextnextCh;
- if (index + 3 < length) {
- nextnextnextCh = toUpperCase(string.charAt(index + 3));
- } else {
- nextnextnextCh = 0;
- }
- if (nextnextCh < 'A' || nextnextCh > 'Z') {
- } else if (nextnextCh == 'E' && nextnextnextCh == 'D') {
- } else {
- result.append('K');
- }
- } else if (prevCh == 'G') {
- result.append('K');
- } else {
- switch (nextCh) {
- case 'E':
- case 'I':
- case 'Y':
- // makesoft
- result.append('J');
- break;
- default:
- result.append('K');
- break;
- }
- }
- break;
- case 'H':
- case 'W':
- case 'Y':
- switch (nextCh) {
- case 'A':
- case 'E':
- case 'I':
- case 'O':
- case 'U':
- // followed by a vowel
- if (ch == 'H') {
- switch (prevCh) {
- case 'C':
- case 'G':
- case 'P':
- case 'S':
- case 'T':
- // affecth
- break;
- default:
- result.append('H');
- break;
- }
- } else {
- result.append(ch);
- }
- break;
- default:
- // not followed by a vowel
- break;
- }
- break;
- case 'K':
- if (prevCh != 'C') {
- result.append('K');
- }
- break;
- case 'P':
- if (nextCh == 'H') {
- result.append('F');
- } else {
- result.append('P');
- }
- break;
- case 'Q':
- result.append('K');
- break;
- case 'S':
- if (nextCh == 'I' && (nextnextCh == 'O' || nextnextCh == 'A')) {
- result.append('X');
- } else if (nextCh == 'H') {
- result.append('X');
- index++;
- } else {
- result.append('S');
- }
- break;
- case 'T':
- if (nextCh == 'I' && (nextnextCh == 'O' || nextnextCh == 'A')) {
- result.append('X');
- } else if (nextCh == 'H') {
- result.append('0');
- index++;
- } else {
- result.append('T');
- }
- break;
- case 'V':
- result.append('F');
- break;
- case 'X':
- result.append('K');
- result.append('S');
- break;
- case 'Z':
- result.append('S');
- break;
- case 'F':
- case 'J':
- case 'L':
- case 'M':
- case 'N':
- case 'R':
- result.append(ch);
- break;
- default:
- break;
- }
- }
- return result.toString();
- }
- /**
- * Returns a formatted money value.
- * XXX: locale charset
- *
- * @param format the format
- * @param value the value
- * @return a string of formatted values
- */
- public static String money_format(Env env, String format, double value) {
- BiancaLocale monetary = env.getLocaleInfo().getMonetary();
- Locale locale = monetary.getLocale();
- return NumberFormat.getCurrencyInstance(locale).format(value);
- }
- /**
- * TODO: finish implementation of nl_langinfo
- */
- public static String nl_langinfo(Env env, int item) {
- BiancaLocale money = env.getLocaleInfo().getMonetary();
- Locale locale = money.getLocale();
- DecimalFormatSymbols decimal = new DecimalFormatSymbols(locale);
- Currency currency = NumberFormat.getInstance(locale).getCurrency();
- String result;
- switch (item) {
- default:
- result = Boolean.FALSE.toString();
- break;
- }
- return result;
- }
- /**
- * Returns a formatted number.
- *
- * @param value the value
- * @param decimals the number of decimals
- * @param pointValue the decimal point string
- * @param groupValue the thousands separator
- * @return a string of the formatted number
- */
- public static String number_format(Env env,
- double value,
- @Optional int decimals,
- @Optional Value pointValue,
- @Optional Value groupValue) {
- boolean isGroupDefault = (groupValue instanceof DefaultValue);
- boolean isPointDefault = (pointValue instanceof DefaultValue);
- if (!isPointDefault && isGroupDefault) {
- env.warning(L.l("wrong parameter count"));
- return null;
- }
- String pattern;
- char point = '.';
- if (!pointValue.isNull()) {
- String pointString = pointValue.toString();
- point = (pointString.length() == 0) ? 0 : pointString.charAt(0);
- }
- char group = ',';
- if (!groupValue.isNull()) {
- String groupString = groupValue.toString();
- group = (groupString.length() == 0) ? 0 : groupString.charAt(0);
- }
- if (decimals > 0) {
- StringBuilder patternBuilder = new StringBuilder(6 + decimals);
- patternBuilder.append(group == 0 ? "###0." : "#,##0.");
- for (int i = 0; i < decimals; i++) {
- patternBuilder.append('0');
- }
- pattern = patternBuilder.toString();
- } else {
- pattern = group == 0 ? "###0" : "#,##0";
- }
- DecimalFormatSymbols decimalFormatSymbols;
- if (point == '.' && group == ',') {
- decimalFormatSymbols = DEFAULT_DECIMAL_FORMAT_SYMBOLS;
- } else {
- decimalFormatSymbols = new DecimalFormatSymbols();
- decimalFormatSymbols.setDecimalSeparator(point);
- decimalFormatSymbols.setGroupingSeparator(group);
- decimalFormatSymbols.setZeroDigit('0');
- }
- DecimalFormat format = new DecimalFormat(pattern, decimalFormatSymbols);
- String result = format.format(value);
- if (point == 0 && decimals > 0) {
- // no way to get DecimalFormat to output nothing for the point,
- // so remove it here
- int i = result.lastIndexOf(point);
- return result.substring(0, i) + result.substring(i + 1, result.length());
- } else {
- return result;
- }
- }
- /**
- * Converts the first character to an integer.
- *
- * @param string the string to be converted
- * @return the integer value
- */
- public static long ord(StringValue string) {
- if (string.length() == 0) {
- return 0;
- } else {
- return string.charAt(0);
- }
- }
- /**
- * Parses the string as a query string.
- *
- * @param env the calling environment
- * @param str the query string
- * @param array the optional result array
- */
- @UsesSymbolTable
- public static Value parse_str(Env env, StringValue str,
- @Optional @Reference Value ref) {
- boolean isRef = ref instanceof Var;
- ArrayValue result = null;
- if (isRef) {
- result = new ArrayValueImpl();
- ref.set(result);
- } else if (ref instanceof ArrayValue) {
- result = (ArrayValue) ref;
- isRef = true;
- } else {
- result = new ArrayValueImpl();
- }
- return StringUtility.parseStr(env,
- str,
- result,
- isRef,
- env.getHttpInputEncoding());
- }
- /**
- * Prints the string.
- *
- * @param env the bianca environment
- * @param value the string to print
- */
- public static long print(Env env, Value value) {
- value.print(env);
- return 1;
- }
- /**
- * print to the output with a formatter
- *
- * @param env the bianca environment
- * @param format the format string
- * @param args the format arguments
- * @return the formatted string
- */
- public static int printf(Env env, StringValue format, Value[] args) {
- Value str = sprintf(env, format, args);
- str.print(env);
- return str.length();
- }
- /**
- * Converts a RFC2045 quoted printable string to a string.
- */
- // TODO: i18n
- public static String quoted_printable_decode(String str) {
- if (str == null) {
- str = "";
- }
- StringBuilder sb = new StringBuilder();
- int length = str.length();
- for (int i = 0; i < length; i++) {
- char ch = str.charAt(i);
- if (33 <= ch && ch <= 60) {
- sb.append(ch);
- } else if (62 <= ch && ch <= 126) {
- sb.append(ch);
- } else if (ch == ' ' || ch == '\t') {
- if (i + 1 < str.length()
- && (str.charAt(i + 1) == '\r' || str.charAt(i + 1) == '\n')) {
- sb.append('=');
- sb.append(toUpperHexChar(ch >> 4));
- sb.append(toUpperHexChar(ch));
- } else {
- sb.append(ch);
- }
- } else if (ch == '\r' || ch == '\n') {
- sb.append(ch);
- } else {
- sb.append('=');
- sb.append(toUpperHexChar(ch >> 4));
- sb.append(toUpperHexChar(ch));
- }
- }
- return sb.toString();
- }
- /**
- * Escapes meta characters.
- *
- * @param string the string to be quoted
- * @return the quoted
- */
- public static Value quotemeta(StringValue string) {
- int len = string.length();
- StringValue sb = new StringValue();
- for (int i = 0; i < len; i++) {
- char ch = string.charAt(i);
- switch (ch) {
- case '.':
- case '\\':
- case '+':
- case '*':
- case '?':
- case '[':
- case '^':
- case ']':
- case '(':
- case ')':
- case '$':
- sb.append("\\");
- sb.append(ch);
- break;
- default:
- sb.append(ch);
- break;
- }
- }
- return sb;
- }
- private static final boolean[] TRIM_WHITESPACE = new boolean[256];
- static {
- TRIM_WHITESPACE['\0'] = true;
- TRIM_WHITESPACE['\b'] = true;
- TRIM_WHITESPACE[' '] = true;
- TRIM_WHITESPACE['\t'] = true;
- TRIM_WHITESPACE['\r'] = true;
- TRIM_WHITESPACE['\n'] = true;
- TRIM_WHITESPACE[0x0B] = true;
- }
- /**
- * Removes trailing whitespace.
- *
- * @param env the bianca environment
- * @param string the string to be trimmed
- * @param characters optional set of characters to trim
- * @return the trimmed string
- */
- public static StringValue rtrim(Env env,
- StringValue string,
- @Optional String characters) {
- if (characters == null) {
- characters = "";
- }
- boolean[] trim;
- if (characters.equals("")) {
- trim = TRIM_WHITESPACE;
- } else {
- trim = parseCharsetBitmap(env, characters);
- }
- for (int i = string.length() - 1; i >= 0; i--) {
- char ch = string.charAt(i);
- if (ch >= 256 || !trim[ch]) {
- if (i == string.length()) {
- return string;
- } else {
- return (StringValue) string.subSequence(0, i + 1);
- }
- }
- }
- return StringValue.EMPTY;
- }
- /**
- * Sets locale configuration.
- */
- public static Value setlocale(Env env,
- int category,
- Value localeArg,
- Value[] fallback) {
- LocaleInfo localeInfo = env.getLocaleInfo();
- if (localeArg instanceof ArrayValue) {
- for (Value value : ((ArrayValue) localeArg).values()) {
- BiancaLocale locale = setLocale(localeInfo,
- category,
- value.toString());
- if (locale != null) {
- return env.createString(locale.toString());
- }
- }
- } else {
- BiancaLocale locale = setLocale(localeInfo,
- category,
- localeArg.toString());
- if (locale != null) {
- return env.createString(locale.toString());
- }
- }
- for (int i = 0; i < fallback.length; i++) {
- BiancaLocale locale = setLocale(localeInfo,
- category,
- fallback[i].toString());
- if (locale != null) {
- return env.createString(locale.toString());
- }
- }
- return BooleanValue.FALSE;
- }
- /**
- * Sets locale configuration.
- */
- private static BiancaLocale setLocale(LocaleInfo localeInfo,
- int category,
- String localeName) {
- BiancaLocale locale = findLocale(localeName);
- if (locale == null) {
- return null;
- }
- switch (category) {
- case LC_ALL:
- localeInfo.setAll(locale);
- return localeInfo.getMessages();
- case LC_COLLATE:
- localeInfo.setCollate(locale);
- return localeInfo.getCollate();
- case LC_CTYPE:
- localeInfo.setCtype(locale);
- return localeInfo.getCtype();
- case LC_MONETARY:
- localeInfo.setMonetary(locale);
- return localeInfo.getMonetary();
- case LC_NUMERIC:
- localeInfo.setNumeric(locale);
- return localeInfo.getNumeric();
- case LC_TIME:
- localeInfo.setTime(locale);
- return localeInfo.getTime();
- case LC_MESSAGES:
- localeInfo.setMessages(locale);
- return localeInfo.getMessages();
- default:
- return null;
- }
- }
- /*
- * Example locale: fr_FR.UTF-8@euro, french (on Windows)
- * (French, France, UTF-8, with euro currency support)
- */
- private static BiancaLocale findLocale(String localeName) {
- String language;
- String country;
- String charset = null;
- String variant = null;
- CharBuffer sb = CharBuffer.allocate();
- int len = localeName.length();
- int i = 0;
- char ch = 0;
- while (i < len && (ch = localeName.charAt(i++)) != '-' && ch != '_') {
- sb.append(ch);
- }
- language = sb.toString();
- sb.clear();
- while (i < len && (ch = localeName.charAt(i)) != '.' && ch != '@') {
- sb.append(ch);
- i++;
- }
- if (ch == '.') {
- i++;
- }
- country = sb.toString();
- sb.clear();
- while (i < len && (ch = localeName.charAt(i)) != '@') {
- sb.append(ch);
- i++;
- }
- if (sb.length() > 0) {
- charset = sb.toString();
- }
- if (i + 1 < len) {
- variant = localeName.substring(i + 1);
- }
- Locale locale;
- // java versions >= 1.5 should automatically use the euro sign
- if (variant != null && !variant.equalsIgnoreCase("euro")) {
- locale = new Locale(language, country, variant);
- } else if (country != null) {
- locale = new Locale(language, country);
- } else {
- locale = new Locale(language);
- }
- if (isValidLocale(locale)) {
- return new BiancaLocale(locale, charset);
- } else {
- return null;
- }
- }
- /**
- * Returns true if the locale is supported.
- */
- private static boolean isValidLocale(Locale locale) {
- Locale[] validLocales = Locale.getAvailableLocales();
- for (int i = 0; i < validLocales.length; i++) {
- if (validLocales[i].equals(locale)) {
- return true;
- }
- }
- return false;
- }
- /**
- * returns the md5 hash
- *
- * @param source the string
- * @param rawOutput if true, return the raw binary
- * @return a string of imploded values
- */
- public static Value sha1(Env env,
- String source,
- @Optional boolean rawOutput) {
- if (source == null…
Large files files are truncated, but you can click here to view the full file