/core/src/library/utils/R/adist.R

http://renjin.googlecode.com/ · R · 82 lines · 69 code · 10 blank · 3 comment · 21 complexity · a9a7ee640e75b88e48d391503c7e18d2 MD5 · raw file

  1. adist <-
  2. function(x, y = NULL, costs = NULL, counts = FALSE, fixed = TRUE,
  3. partial = !fixed, ignore.case = FALSE, useBytes = FALSE)
  4. {
  5. bytesToInt <- function(x) {
  6. if(is.na(x)) return(NA_integer_)
  7. as.integer(charToRaw(x))
  8. }
  9. costs <- .amatch_costs(costs)
  10. nmx <- names(x)
  11. x <- as.character(x)
  12. names(x) <- nmx
  13. if(!is.null(y)) {
  14. nmy <- names(y)
  15. y <- as.character(y)
  16. names(y) <- nmy
  17. }
  18. if(!identical(fixed, FALSE) && !identical(partial, TRUE)) {
  19. ex <- Encoding(x)
  20. useBytes <- identical(useBytes, TRUE) || any(ex == "bytes")
  21. if(!is.null(y)) {
  22. ey <- Encoding(y)
  23. useBytes <- useBytes || any(ey == "bytes")
  24. }
  25. if(useBytes) {
  26. x <- lapply(x, bytesToInt)
  27. y <- if(is.null(y)) {
  28. x
  29. } else {
  30. lapply(y, bytesToInt)
  31. }
  32. } else {
  33. ignore.case <- identical(ignore.case, TRUE)
  34. x <- if(ignore.case) {
  35. lapply(tolower(enc2utf8(x)), utf8ToInt)
  36. } else {
  37. lapply(enc2utf8(x), utf8ToInt)
  38. }
  39. y <- if(is.null(y)) {
  40. x
  41. } else if(ignore.case) {
  42. lapply(tolower(enc2utf8(y)), utf8ToInt)
  43. } else {
  44. lapply(enc2utf8(y), utf8ToInt)
  45. }
  46. }
  47. }
  48. else {
  49. if(is.null(y)) {
  50. y <- x
  51. }
  52. ## TRE needs integer costs: coerce here for simplicity.
  53. costs <- as.integer(costs)
  54. }
  55. .Internal(adist(x, y, costs, counts, fixed, partial, ignore.case,
  56. useBytes))
  57. }
  58. aregexec <-
  59. function(pattern, text, max.distance = 0.1, costs = NULL,
  60. ignore.case = FALSE, fixed = FALSE, useBytes = FALSE)
  61. {
  62. ## TRE needs integer costs: coerce here for simplicity.
  63. costs <- as.integer(.amatch_costs(costs))
  64. bounds <- .amatch_bounds(max.distance)
  65. .Internal(aregexec(as.character(pattern),
  66. as.character(text),
  67. bounds, costs, ignore.case, fixed, useBytes))
  68. }
  69. ## No longer used by adist(), but could be more generally useful ...
  70. regquote <-
  71. function(x)
  72. gsub("([*.?+^&\\[])", "\\\\\\1", x)