PageRenderTime 35ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/libgo/go/strings/strings.go

https://bitbucket.org/pizzafactory/pf-gcc
Go | 559 lines | 405 code | 47 blank | 107 comment | 147 complexity | 09aea55ba9faaadb8b3183d2a77c9567 MD5 | raw file
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // A package of simple functions to manipulate strings.
  5. package strings
  6. import (
  7. "unicode"
  8. "utf8"
  9. )
  10. // explode splits s into an array of UTF-8 sequences, one per Unicode character (still strings) up to a maximum of n (n < 0 means no limit).
  11. // Invalid UTF-8 sequences become correct encodings of U+FFF8.
  12. func explode(s string, n int) []string {
  13. if n == 0 {
  14. return nil
  15. }
  16. l := utf8.RuneCountInString(s)
  17. if n <= 0 || n > l {
  18. n = l
  19. }
  20. a := make([]string, n)
  21. var size, rune int
  22. i, cur := 0, 0
  23. for ; i+1 < n; i++ {
  24. rune, size = utf8.DecodeRuneInString(s[cur:])
  25. a[i] = string(rune)
  26. cur += size
  27. }
  28. // add the rest, if there is any
  29. if cur < len(s) {
  30. a[i] = s[cur:]
  31. }
  32. return a
  33. }
  34. // Count counts the number of non-overlapping instances of sep in s.
  35. func Count(s, sep string) int {
  36. if sep == "" {
  37. return utf8.RuneCountInString(s) + 1
  38. }
  39. c := sep[0]
  40. l := len(sep)
  41. n := 0
  42. if l == 1 {
  43. // special case worth making fast
  44. for i := 0; i < len(s); i++ {
  45. if s[i] == c {
  46. n++
  47. }
  48. }
  49. return n
  50. }
  51. for i := 0; i+l <= len(s); i++ {
  52. if s[i] == c && s[i:i+l] == sep {
  53. n++
  54. i += l - 1
  55. }
  56. }
  57. return n
  58. }
  59. // Contains returns true if substr is within s.
  60. func Contains(s, substr string) bool {
  61. return Index(s, substr) != -1
  62. }
  63. // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
  64. func Index(s, sep string) int {
  65. n := len(sep)
  66. if n == 0 {
  67. return 0
  68. }
  69. c := sep[0]
  70. if n == 1 {
  71. // special case worth making fast
  72. for i := 0; i < len(s); i++ {
  73. if s[i] == c {
  74. return i
  75. }
  76. }
  77. return -1
  78. }
  79. // n > 1
  80. for i := 0; i+n <= len(s); i++ {
  81. if s[i] == c && s[i:i+n] == sep {
  82. return i
  83. }
  84. }
  85. return -1
  86. }
  87. // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
  88. func LastIndex(s, sep string) int {
  89. n := len(sep)
  90. if n == 0 {
  91. return len(s)
  92. }
  93. c := sep[0]
  94. if n == 1 {
  95. // special case worth making fast
  96. for i := len(s) - 1; i >= 0; i-- {
  97. if s[i] == c {
  98. return i
  99. }
  100. }
  101. return -1
  102. }
  103. // n > 1
  104. for i := len(s) - n; i >= 0; i-- {
  105. if s[i] == c && s[i:i+n] == sep {
  106. return i
  107. }
  108. }
  109. return -1
  110. }
  111. // IndexRune returns the index of the first instance of the Unicode code point
  112. // rune, or -1 if rune is not present in s.
  113. func IndexRune(s string, rune int) int {
  114. for i, c := range s {
  115. if c == rune {
  116. return i
  117. }
  118. }
  119. return -1
  120. }
  121. // IndexAny returns the index of the first instance of any Unicode code point
  122. // from chars in s, or -1 if no Unicode code point from chars is present in s.
  123. func IndexAny(s, chars string) int {
  124. if len(chars) > 0 {
  125. for i, c := range s {
  126. for _, m := range chars {
  127. if c == m {
  128. return i
  129. }
  130. }
  131. }
  132. }
  133. return -1
  134. }
  135. // LastIndexAny returns the index of the last instance of any Unicode code
  136. // point from chars in s, or -1 if no Unicode code point from chars is
  137. // present in s.
  138. func LastIndexAny(s, chars string) int {
  139. if len(chars) > 0 {
  140. for i := len(s); i > 0; {
  141. rune, size := utf8.DecodeLastRuneInString(s[0:i])
  142. i -= size
  143. for _, m := range chars {
  144. if rune == m {
  145. return i
  146. }
  147. }
  148. }
  149. }
  150. return -1
  151. }
  152. // Generic split: splits after each instance of sep,
  153. // including sepSave bytes of sep in the subarrays.
  154. func genSplit(s, sep string, sepSave, n int) []string {
  155. if n == 0 {
  156. return nil
  157. }
  158. if sep == "" {
  159. return explode(s, n)
  160. }
  161. if n < 0 {
  162. n = Count(s, sep) + 1
  163. }
  164. c := sep[0]
  165. start := 0
  166. a := make([]string, n)
  167. na := 0
  168. for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
  169. if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) {
  170. a[na] = s[start : i+sepSave]
  171. na++
  172. start = i + len(sep)
  173. i += len(sep) - 1
  174. }
  175. }
  176. a[na] = s[start:]
  177. return a[0 : na+1]
  178. }
  179. // Split slices s into substrings separated by sep and returns a slice of
  180. // the substrings between those separators.
  181. // If sep is empty, Split splits after each UTF-8 sequence.
  182. // The count determines the number of substrings to return:
  183. // n > 0: at most n substrings; the last substring will be the unsplit remainder.
  184. // n == 0: the result is nil (zero substrings)
  185. // n < 0: all substrings
  186. func Split(s, sep string, n int) []string { return genSplit(s, sep, 0, n) }
  187. // SplitAfter slices s into substrings after each instance of sep and
  188. // returns a slice of those substrings.
  189. // If sep is empty, Split splits after each UTF-8 sequence.
  190. // The count determines the number of substrings to return:
  191. // n > 0: at most n substrings; the last substring will be the unsplit remainder.
  192. // n == 0: the result is nil (zero substrings)
  193. // n < 0: all substrings
  194. func SplitAfter(s, sep string, n int) []string {
  195. return genSplit(s, sep, len(sep), n)
  196. }
  197. // Fields splits the string s around each instance of one or more consecutive white space
  198. // characters, returning an array of substrings of s or an empty list if s contains only white space.
  199. func Fields(s string) []string {
  200. return FieldsFunc(s, unicode.IsSpace)
  201. }
  202. // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
  203. // and returns an array of slices of s. If all code points in s satisfy f(c) or the
  204. // string is empty, an empty slice is returned.
  205. func FieldsFunc(s string, f func(int) bool) []string {
  206. // First count the fields.
  207. n := 0
  208. inField := false
  209. for _, rune := range s {
  210. wasInField := inField
  211. inField = !f(rune)
  212. if inField && !wasInField {
  213. n++
  214. }
  215. }
  216. // Now create them.
  217. a := make([]string, n)
  218. na := 0
  219. fieldStart := -1 // Set to -1 when looking for start of field.
  220. for i, rune := range s {
  221. if f(rune) {
  222. if fieldStart >= 0 {
  223. a[na] = s[fieldStart:i]
  224. na++
  225. fieldStart = -1
  226. }
  227. } else if fieldStart == -1 {
  228. fieldStart = i
  229. }
  230. }
  231. if fieldStart != -1 { // Last field might end at EOF.
  232. a[na] = s[fieldStart:]
  233. }
  234. return a
  235. }
  236. // Join concatenates the elements of a to create a single string. The separator string
  237. // sep is placed between elements in the resulting string.
  238. func Join(a []string, sep string) string {
  239. if len(a) == 0 {
  240. return ""
  241. }
  242. if len(a) == 1 {
  243. return a[0]
  244. }
  245. n := len(sep) * (len(a) - 1)
  246. for i := 0; i < len(a); i++ {
  247. n += len(a[i])
  248. }
  249. b := make([]byte, n)
  250. bp := 0
  251. for i := 0; i < len(a); i++ {
  252. s := a[i]
  253. for j := 0; j < len(s); j++ {
  254. b[bp] = s[j]
  255. bp++
  256. }
  257. if i+1 < len(a) {
  258. s = sep
  259. for j := 0; j < len(s); j++ {
  260. b[bp] = s[j]
  261. bp++
  262. }
  263. }
  264. }
  265. return string(b)
  266. }
  267. // HasPrefix tests whether the string s begins with prefix.
  268. func HasPrefix(s, prefix string) bool {
  269. return len(s) >= len(prefix) && s[0:len(prefix)] == prefix
  270. }
  271. // HasSuffix tests whether the string s ends with suffix.
  272. func HasSuffix(s, suffix string) bool {
  273. return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
  274. }
  275. // Map returns a copy of the string s with all its characters modified
  276. // according to the mapping function. If mapping returns a negative value, the character is
  277. // dropped from the string with no replacement.
  278. func Map(mapping func(rune int) int, s string) string {
  279. // In the worst case, the string can grow when mapped, making
  280. // things unpleasant. But it's so rare we barge in assuming it's
  281. // fine. It could also shrink but that falls out naturally.
  282. maxbytes := len(s) // length of b
  283. nbytes := 0 // number of bytes encoded in b
  284. b := make([]byte, maxbytes)
  285. for _, c := range s {
  286. rune := mapping(c)
  287. if rune >= 0 {
  288. wid := 1
  289. if rune >= utf8.RuneSelf {
  290. wid = utf8.RuneLen(rune)
  291. }
  292. if nbytes+wid > maxbytes {
  293. // Grow the buffer.
  294. maxbytes = maxbytes*2 + utf8.UTFMax
  295. nb := make([]byte, maxbytes)
  296. copy(nb, b[0:nbytes])
  297. b = nb
  298. }
  299. nbytes += utf8.EncodeRune(b[nbytes:maxbytes], rune)
  300. }
  301. }
  302. return string(b[0:nbytes])
  303. }
  304. // Repeat returns a new string consisting of count copies of the string s.
  305. func Repeat(s string, count int) string {
  306. b := make([]byte, len(s)*count)
  307. bp := 0
  308. for i := 0; i < count; i++ {
  309. for j := 0; j < len(s); j++ {
  310. b[bp] = s[j]
  311. bp++
  312. }
  313. }
  314. return string(b)
  315. }
  316. // ToUpper returns a copy of the string s with all Unicode letters mapped to their upper case.
  317. func ToUpper(s string) string { return Map(unicode.ToUpper, s) }
  318. // ToLower returns a copy of the string s with all Unicode letters mapped to their lower case.
  319. func ToLower(s string) string { return Map(unicode.ToLower, s) }
  320. // ToTitle returns a copy of the string s with all Unicode letters mapped to their title case.
  321. func ToTitle(s string) string { return Map(unicode.ToTitle, s) }
  322. // ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their
  323. // upper case, giving priority to the special casing rules.
  324. func ToUpperSpecial(_case unicode.SpecialCase, s string) string {
  325. return Map(func(r int) int { return _case.ToUpper(r) }, s)
  326. }
  327. // ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their
  328. // lower case, giving priority to the special casing rules.
  329. func ToLowerSpecial(_case unicode.SpecialCase, s string) string {
  330. return Map(func(r int) int { return _case.ToLower(r) }, s)
  331. }
  332. // ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their
  333. // title case, giving priority to the special casing rules.
  334. func ToTitleSpecial(_case unicode.SpecialCase, s string) string {
  335. return Map(func(r int) int { return _case.ToTitle(r) }, s)
  336. }
  337. // isSeparator reports whether the rune could mark a word boundary.
  338. // TODO: update when package unicode captures more of the properties.
  339. func isSeparator(rune int) bool {
  340. // ASCII alphanumerics and underscore are not separators
  341. if rune <= 0x7F {
  342. switch {
  343. case '0' <= rune && rune <= '9':
  344. return false
  345. case 'a' <= rune && rune <= 'z':
  346. return false
  347. case 'A' <= rune && rune <= 'Z':
  348. return false
  349. case rune == '_':
  350. return false
  351. }
  352. return true
  353. }
  354. // Letters and digits are not separators
  355. if unicode.IsLetter(rune) || unicode.IsDigit(rune) {
  356. return false
  357. }
  358. // Otherwise, all we can do for now is treat spaces as separators.
  359. return unicode.IsSpace(rune)
  360. }
  361. // BUG(r): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
  362. // Title returns a copy of the string s with all Unicode letters that begin words
  363. // mapped to their title case.
  364. func Title(s string) string {
  365. // Use a closure here to remember state.
  366. // Hackish but effective. Depends on Map scanning in order and calling
  367. // the closure once per rune.
  368. prev := ' '
  369. return Map(
  370. func(r int) int {
  371. if isSeparator(prev) {
  372. prev = r
  373. return unicode.ToTitle(r)
  374. }
  375. prev = r
  376. return r
  377. },
  378. s)
  379. }
  380. // TrimLeftFunc returns a slice of the string s with all leading
  381. // Unicode code points c satisfying f(c) removed.
  382. func TrimLeftFunc(s string, f func(r int) bool) string {
  383. i := indexFunc(s, f, false)
  384. if i == -1 {
  385. return ""
  386. }
  387. return s[i:]
  388. }
  389. // TrimRightFunc returns a slice of the string s with all trailing
  390. // Unicode code points c satisfying f(c) removed.
  391. func TrimRightFunc(s string, f func(r int) bool) string {
  392. i := lastIndexFunc(s, f, false)
  393. if i >= 0 && s[i] >= utf8.RuneSelf {
  394. _, wid := utf8.DecodeRuneInString(s[i:])
  395. i += wid
  396. } else {
  397. i++
  398. }
  399. return s[0:i]
  400. }
  401. // TrimFunc returns a slice of the string s with all leading
  402. // and trailing Unicode code points c satisfying f(c) removed.
  403. func TrimFunc(s string, f func(r int) bool) string {
  404. return TrimRightFunc(TrimLeftFunc(s, f), f)
  405. }
  406. // IndexFunc returns the index into s of the first Unicode
  407. // code point satisfying f(c), or -1 if none do.
  408. func IndexFunc(s string, f func(r int) bool) int {
  409. return indexFunc(s, f, true)
  410. }
  411. // LastIndexFunc returns the index into s of the last
  412. // Unicode code point satisfying f(c), or -1 if none do.
  413. func LastIndexFunc(s string, f func(r int) bool) int {
  414. return lastIndexFunc(s, f, true)
  415. }
  416. // indexFunc is the same as IndexFunc except that if
  417. // truth==false, the sense of the predicate function is
  418. // inverted.
  419. func indexFunc(s string, f func(r int) bool, truth bool) int {
  420. start := 0
  421. for start < len(s) {
  422. wid := 1
  423. rune := int(s[start])
  424. if rune >= utf8.RuneSelf {
  425. rune, wid = utf8.DecodeRuneInString(s[start:])
  426. }
  427. if f(rune) == truth {
  428. return start
  429. }
  430. start += wid
  431. }
  432. return -1
  433. }
  434. // lastIndexFunc is the same as LastIndexFunc except that if
  435. // truth==false, the sense of the predicate function is
  436. // inverted.
  437. func lastIndexFunc(s string, f func(r int) bool, truth bool) int {
  438. for i := len(s); i > 0; {
  439. rune, size := utf8.DecodeLastRuneInString(s[0:i])
  440. i -= size
  441. if f(rune) == truth {
  442. return i
  443. }
  444. }
  445. return -1
  446. }
  447. func makeCutsetFunc(cutset string) func(rune int) bool {
  448. return func(rune int) bool { return IndexRune(cutset, rune) != -1 }
  449. }
  450. // Trim returns a slice of the string s with all leading and
  451. // trailing Unicode code points contained in cutset removed.
  452. func Trim(s string, cutset string) string {
  453. if s == "" || cutset == "" {
  454. return s
  455. }
  456. return TrimFunc(s, makeCutsetFunc(cutset))
  457. }
  458. // TrimLeft returns a slice of the string s with all leading
  459. // Unicode code points contained in cutset removed.
  460. func TrimLeft(s string, cutset string) string {
  461. if s == "" || cutset == "" {
  462. return s
  463. }
  464. return TrimLeftFunc(s, makeCutsetFunc(cutset))
  465. }
  466. // TrimRight returns a slice of the string s, with all trailing
  467. // Unicode code points contained in cutset removed.
  468. func TrimRight(s string, cutset string) string {
  469. if s == "" || cutset == "" {
  470. return s
  471. }
  472. return TrimRightFunc(s, makeCutsetFunc(cutset))
  473. }
  474. // TrimSpace returns a slice of the string s, with all leading
  475. // and trailing white space removed, as defined by Unicode.
  476. func TrimSpace(s string) string {
  477. return TrimFunc(s, unicode.IsSpace)
  478. }
  479. // Replace returns a copy of the string s with the first n
  480. // non-overlapping instances of old replaced by new.
  481. // If n < 0, there is no limit on the number of replacements.
  482. func Replace(s, old, new string, n int) string {
  483. if old == new || n == 0 {
  484. return s // avoid allocation
  485. }
  486. // Compute number of replacements.
  487. if m := Count(s, old); m == 0 {
  488. return s // avoid allocation
  489. } else if n < 0 || m < n {
  490. n = m
  491. }
  492. // Apply replacements to buffer.
  493. t := make([]byte, len(s)+n*(len(new)-len(old)))
  494. w := 0
  495. start := 0
  496. for i := 0; i < n; i++ {
  497. j := start
  498. if len(old) == 0 {
  499. if i > 0 {
  500. _, wid := utf8.DecodeRuneInString(s[start:])
  501. j += wid
  502. }
  503. } else {
  504. j += Index(s[start:], old)
  505. }
  506. w += copy(t[w:], s[start:j])
  507. w += copy(t[w:], new)
  508. start = j + len(old)
  509. }
  510. w += copy(t[w:], s[start:])
  511. return string(t[0:w])
  512. }