PageRenderTime 60ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/core/strings.py

https://github.com/ajcr/pandas
Python | 1034 lines | 1009 code | 9 blank | 16 comment | 6 complexity | fb1cbcd96fbfca30467682f58b50482c MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. import numpy as np
  2. from pandas.compat import zip
  3. from pandas.core.common import isnull, _values_from_object
  4. from pandas.core.series import Series
  5. from pandas.core.frame import DataFrame
  6. import pandas.compat as compat
  7. import re
  8. import pandas.lib as lib
  9. import warnings
  10. import textwrap
  11. def _get_array_list(arr, others):
  12. if len(others) and isinstance(others[0], (list, np.ndarray)):
  13. arrays = [arr] + list(others)
  14. else:
  15. arrays = [arr, others]
  16. return [np.asarray(x, dtype=object) for x in arrays]
  17. def str_cat(arr, others=None, sep=None, na_rep=None):
  18. """
  19. Concatenate arrays of strings with given separator
  20. Parameters
  21. ----------
  22. arr : list or array-like
  23. others : list or array, or list of arrays
  24. sep : string or None, default None
  25. na_rep : string or None, default None
  26. If None, an NA in any array will propagate
  27. Returns
  28. -------
  29. concat : array
  30. """
  31. if sep is None:
  32. sep = ''
  33. if others is not None:
  34. arrays = _get_array_list(arr, others)
  35. n = _length_check(arrays)
  36. masks = np.array([isnull(x) for x in arrays])
  37. cats = None
  38. if na_rep is None:
  39. na_mask = np.logical_or.reduce(masks, axis=0)
  40. result = np.empty(n, dtype=object)
  41. np.putmask(result, na_mask, np.nan)
  42. notmask = ~na_mask
  43. tuples = zip(*[x[notmask] for x in arrays])
  44. cats = [sep.join(tup) for tup in tuples]
  45. result[notmask] = cats
  46. else:
  47. for i, x in enumerate(arrays):
  48. x = np.where(masks[i], na_rep, x)
  49. if cats is None:
  50. cats = x
  51. else:
  52. cats = cats + sep + x
  53. result = cats
  54. return result
  55. else:
  56. arr = np.asarray(arr, dtype=object)
  57. mask = isnull(arr)
  58. if na_rep is None and mask.any():
  59. return np.nan
  60. return sep.join(np.where(mask, na_rep, arr))
  61. def _length_check(others):
  62. n = None
  63. for x in others:
  64. if n is None:
  65. n = len(x)
  66. elif len(x) != n:
  67. raise ValueError('All arrays must be same length')
  68. return n
  69. def _na_map(f, arr, na_result=np.nan, dtype=object):
  70. # should really _check_ for NA
  71. return _map(f, arr, na_mask=True, na_value=na_result, dtype=dtype)
  72. def _map(f, arr, na_mask=False, na_value=np.nan, dtype=object):
  73. if not len(arr):
  74. return np.ndarray(0, dtype=dtype)
  75. if isinstance(arr, Series):
  76. arr = arr.values
  77. if not isinstance(arr, np.ndarray):
  78. arr = np.asarray(arr, dtype=object)
  79. if na_mask:
  80. mask = isnull(arr)
  81. try:
  82. result = lib.map_infer_mask(arr, f, mask.view(np.uint8))
  83. except (TypeError, AttributeError):
  84. def g(x):
  85. try:
  86. return f(x)
  87. except (TypeError, AttributeError):
  88. return na_value
  89. return _map(g, arr, dtype=dtype)
  90. if na_value is not np.nan:
  91. np.putmask(result, mask, na_value)
  92. if result.dtype == object:
  93. result = lib.maybe_convert_objects(result)
  94. return result
  95. else:
  96. return lib.map_infer(arr, f)
  97. def str_title(arr):
  98. """
  99. Convert strings to titlecased version
  100. Returns
  101. -------
  102. titled : array
  103. """
  104. return _na_map(lambda x: x.title(), arr)
  105. def str_count(arr, pat, flags=0):
  106. """
  107. Count occurrences of pattern in each string
  108. Parameters
  109. ----------
  110. arr : list or array-like
  111. pat : string, valid regular expression
  112. flags : int, default 0 (no flags)
  113. re module flags, e.g. re.IGNORECASE
  114. Returns
  115. -------
  116. counts : arrays
  117. """
  118. regex = re.compile(pat, flags=flags)
  119. f = lambda x: len(regex.findall(x))
  120. return _na_map(f, arr, dtype=int)
  121. def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True):
  122. """
  123. Check whether given pattern is contained in each string in the array
  124. Parameters
  125. ----------
  126. pat : string
  127. Character sequence or regular expression
  128. case : boolean, default True
  129. If True, case sensitive
  130. flags : int, default 0 (no flags)
  131. re module flags, e.g. re.IGNORECASE
  132. na : default NaN, fill value for missing values.
  133. regex : bool, default True
  134. If True use re.search, otherwise use Python in operator
  135. Returns
  136. -------
  137. Series of boolean values
  138. See Also
  139. --------
  140. match : analagous, but stricter, relying on re.match instead of re.search
  141. """
  142. if regex:
  143. if not case:
  144. flags |= re.IGNORECASE
  145. regex = re.compile(pat, flags=flags)
  146. if regex.groups > 0:
  147. warnings.warn("This pattern has match groups. To actually get the"
  148. " groups, use str.extract.", UserWarning)
  149. f = lambda x: bool(regex.search(x))
  150. else:
  151. f = lambda x: pat in x
  152. return _na_map(f, arr, na, dtype=bool)
  153. def str_startswith(arr, pat, na=np.nan):
  154. """
  155. Return boolean array indicating whether each string starts with passed
  156. pattern
  157. Parameters
  158. ----------
  159. pat : string
  160. Character sequence
  161. na : bool, default NaN
  162. Returns
  163. -------
  164. startswith : array (boolean)
  165. """
  166. f = lambda x: x.startswith(pat)
  167. return _na_map(f, arr, na, dtype=bool)
  168. def str_endswith(arr, pat, na=np.nan):
  169. """
  170. Return boolean array indicating whether each string ends with passed
  171. pattern
  172. Parameters
  173. ----------
  174. pat : string
  175. Character sequence
  176. na : bool, default NaN
  177. Returns
  178. -------
  179. endswith : array (boolean)
  180. """
  181. f = lambda x: x.endswith(pat)
  182. return _na_map(f, arr, na, dtype=bool)
  183. def str_lower(arr):
  184. """
  185. Convert strings in array to lowercase
  186. Returns
  187. -------
  188. lowercase : array
  189. """
  190. return _na_map(lambda x: x.lower(), arr)
  191. def str_upper(arr):
  192. """
  193. Convert strings in array to uppercase
  194. Returns
  195. -------
  196. uppercase : array
  197. """
  198. return _na_map(lambda x: x.upper(), arr)
  199. def str_replace(arr, pat, repl, n=-1, case=True, flags=0):
  200. """
  201. Replace
  202. Parameters
  203. ----------
  204. pat : string
  205. Character sequence or regular expression
  206. repl : string
  207. Replacement sequence
  208. n : int, default -1 (all)
  209. Number of replacements to make from start
  210. case : boolean, default True
  211. If True, case sensitive
  212. flags : int, default 0 (no flags)
  213. re module flags, e.g. re.IGNORECASE
  214. Returns
  215. -------
  216. replaced : array
  217. """
  218. use_re = not case or len(pat) > 1 or flags
  219. if use_re:
  220. if not case:
  221. flags |= re.IGNORECASE
  222. regex = re.compile(pat, flags=flags)
  223. n = n if n >= 0 else 0
  224. def f(x):
  225. return regex.sub(repl, x, count=n)
  226. else:
  227. f = lambda x: x.replace(pat, repl, n)
  228. return _na_map(f, arr)
  229. def str_repeat(arr, repeats):
  230. """
  231. Duplicate each string in the array by indicated number of times
  232. Parameters
  233. ----------
  234. repeats : int or array
  235. Same value for all (int) or different value per (array)
  236. Returns
  237. -------
  238. repeated : array
  239. """
  240. if np.isscalar(repeats):
  241. def rep(x):
  242. try:
  243. return compat.binary_type.__mul__(x, repeats)
  244. except TypeError:
  245. return compat.text_type.__mul__(x, repeats)
  246. return _na_map(rep, arr)
  247. else:
  248. def rep(x, r):
  249. try:
  250. return compat.binary_type.__mul__(x, r)
  251. except TypeError:
  252. return compat.text_type.__mul__(x, r)
  253. repeats = np.asarray(repeats, dtype=object)
  254. result = lib.vec_binop(_values_from_object(arr), repeats, rep)
  255. return result
  256. def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=False):
  257. """
  258. Deprecated: Find groups in each string using passed regular expression.
  259. If as_indexer=True, determine if each string matches a regular expression.
  260. Parameters
  261. ----------
  262. pat : string
  263. Character sequence or regular expression
  264. case : boolean, default True
  265. If True, case sensitive
  266. flags : int, default 0 (no flags)
  267. re module flags, e.g. re.IGNORECASE
  268. na : default NaN, fill value for missing values.
  269. as_indexer : False, by default, gives deprecated behavior better achieved
  270. using str_extract. True return boolean indexer.
  271. Returns
  272. -------
  273. Series of boolean values
  274. if as_indexer=True
  275. Series of tuples
  276. if as_indexer=False, default but deprecated
  277. See Also
  278. --------
  279. contains : analagous, but less strict, relying on re.search instead of
  280. re.match
  281. extract : now preferred to the deprecated usage of match (as_indexer=False)
  282. Notes
  283. -----
  284. To extract matched groups, which is the deprecated behavior of match, use
  285. str.extract.
  286. """
  287. if not case:
  288. flags |= re.IGNORECASE
  289. regex = re.compile(pat, flags=flags)
  290. if (not as_indexer) and regex.groups > 0:
  291. # Do this first, to make sure it happens even if the re.compile
  292. # raises below.
  293. warnings.warn("In future versions of pandas, match will change to"
  294. " always return a bool indexer.", UserWarning)
  295. if as_indexer and regex.groups > 0:
  296. warnings.warn("This pattern has match groups. To actually get the"
  297. " groups, use str.extract.", UserWarning)
  298. # If not as_indexer and regex.groups == 0, this returns empty lists
  299. # and is basically useless, so we will not warn.
  300. if (not as_indexer) and regex.groups > 0:
  301. dtype = object
  302. def f(x):
  303. m = regex.match(x)
  304. if m:
  305. return m.groups()
  306. else:
  307. return []
  308. else:
  309. # This is the new behavior of str_match.
  310. dtype = bool
  311. f = lambda x: bool(regex.match(x))
  312. return _na_map(f, arr, na, dtype=dtype)
  313. def _get_single_group_name(rx):
  314. try:
  315. return list(rx.groupindex.keys()).pop()
  316. except IndexError:
  317. return None
  318. def str_extract(arr, pat, flags=0):
  319. """
  320. Find groups in each string using passed regular expression
  321. Parameters
  322. ----------
  323. pat : string
  324. Pattern or regular expression
  325. flags : int, default 0 (no flags)
  326. re module flags, e.g. re.IGNORECASE
  327. Returns
  328. -------
  329. extracted groups : Series (one group) or DataFrame (multiple groups)
  330. Note that dtype of the result is always object, even when no match is
  331. found and the result is a Series or DataFrame containing only NaN
  332. values.
  333. Examples
  334. --------
  335. A pattern with one group will return a Series. Non-matches will be NaN.
  336. >>> Series(['a1', 'b2', 'c3']).str.extract('[ab](\d)')
  337. 0 1
  338. 1 2
  339. 2 NaN
  340. dtype: object
  341. A pattern with more than one group will return a DataFrame.
  342. >>> Series(['a1', 'b2', 'c3']).str.extract('([ab])(\d)')
  343. 0 1
  344. 0 a 1
  345. 1 b 2
  346. 2 NaN NaN
  347. A pattern may contain optional groups.
  348. >>> Series(['a1', 'b2', 'c3']).str.extract('([ab])?(\d)')
  349. 0 1
  350. 0 a 1
  351. 1 b 2
  352. 2 NaN 3
  353. Named groups will become column names in the result.
  354. >>> Series(['a1', 'b2', 'c3']).str.extract('(?P<letter>[ab])(?P<digit>\d)')
  355. letter digit
  356. 0 a 1
  357. 1 b 2
  358. 2 NaN NaN
  359. """
  360. regex = re.compile(pat, flags=flags)
  361. # just to be safe, check this
  362. if regex.groups == 0:
  363. raise ValueError("This pattern contains no groups to capture.")
  364. empty_row = [np.nan]*regex.groups
  365. def f(x):
  366. if not isinstance(x, compat.string_types):
  367. return empty_row
  368. m = regex.search(x)
  369. if m:
  370. return [np.nan if item is None else item for item in m.groups()]
  371. else:
  372. return empty_row
  373. if regex.groups == 1:
  374. result = Series([f(val)[0] for val in arr],
  375. name=_get_single_group_name(regex),
  376. index=arr.index, dtype=object)
  377. else:
  378. names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
  379. columns = [names.get(1 + i, i) for i in range(regex.groups)]
  380. if arr.empty:
  381. result = DataFrame(columns=columns, dtype=object)
  382. else:
  383. result = DataFrame([f(val) for val in arr],
  384. columns=columns,
  385. index=arr.index,
  386. dtype=object)
  387. return result
  388. def str_get_dummies(arr, sep='|'):
  389. """
  390. Split each string by sep and return a frame of dummy/indicator variables.
  391. Examples
  392. --------
  393. >>> Series(['a|b', 'a', 'a|c']).str.get_dummies()
  394. a b c
  395. 0 1 1 0
  396. 1 1 0 0
  397. 2 1 0 1
  398. >>> pd.Series(['a|b', np.nan, 'a|c']).str.get_dummies()
  399. a b c
  400. 0 1 1 0
  401. 1 0 0 0
  402. 2 1 0 1
  403. See also ``pd.get_dummies``.
  404. """
  405. # TODO remove this hack?
  406. arr = arr.fillna('')
  407. try:
  408. arr = sep + arr + sep
  409. except TypeError:
  410. arr = sep + arr.astype(str) + sep
  411. tags = set()
  412. for ts in arr.str.split(sep):
  413. tags.update(ts)
  414. tags = sorted(tags - set([""]))
  415. dummies = np.empty((len(arr), len(tags)), dtype=np.int64)
  416. for i, t in enumerate(tags):
  417. pat = sep + t + sep
  418. dummies[:, i] = lib.map_infer(arr.values, lambda x: pat in x)
  419. return DataFrame(dummies, arr.index, tags)
  420. def str_join(arr, sep):
  421. """
  422. Join lists contained as elements in array, a la str.join
  423. Parameters
  424. ----------
  425. sep : string
  426. Delimiter
  427. Returns
  428. -------
  429. joined : array
  430. """
  431. return _na_map(sep.join, arr)
  432. def str_len(arr):
  433. """
  434. Compute length of each string in array.
  435. Returns
  436. -------
  437. lengths : array
  438. """
  439. return _na_map(len, arr, dtype=int)
  440. def str_findall(arr, pat, flags=0):
  441. """
  442. Find all occurrences of pattern or regular expression
  443. Parameters
  444. ----------
  445. pat : string
  446. Pattern or regular expression
  447. flags : int, default 0 (no flags)
  448. re module flags, e.g. re.IGNORECASE
  449. Returns
  450. -------
  451. matches : array
  452. """
  453. regex = re.compile(pat, flags=flags)
  454. return _na_map(regex.findall, arr)
  455. def str_pad(arr, width, side='left'):
  456. """
  457. Pad strings with whitespace
  458. Parameters
  459. ----------
  460. arr : list or array-like
  461. width : int
  462. Minimum width of resulting string; additional characters will be filled
  463. with spaces
  464. side : {'left', 'right', 'both'}, default 'left'
  465. Returns
  466. -------
  467. padded : array
  468. """
  469. if side == 'left':
  470. f = lambda x: x.rjust(width)
  471. elif side == 'right':
  472. f = lambda x: x.ljust(width)
  473. elif side == 'both':
  474. f = lambda x: x.center(width)
  475. else: # pragma: no cover
  476. raise ValueError('Invalid side')
  477. return _na_map(f, arr)
  478. def str_center(arr, width):
  479. """
  480. "Center" strings, filling left and right side with additional whitespace
  481. Parameters
  482. ----------
  483. width : int
  484. Minimum width of resulting string; additional characters will be filled
  485. with spaces
  486. Returns
  487. -------
  488. centered : array
  489. """
  490. return str_pad(arr, width, side='both')
  491. def str_split(arr, pat=None, n=None):
  492. """
  493. Split each string (a la re.split) in array by given pattern, propagating NA
  494. values
  495. Parameters
  496. ----------
  497. pat : string, default None
  498. String or regular expression to split on. If None, splits on whitespace
  499. n : int, default None (all)
  500. Notes
  501. -----
  502. Both 0 and -1 will be interpreted as return all splits
  503. Returns
  504. -------
  505. split : array
  506. """
  507. if pat is None:
  508. if n is None or n == 0:
  509. n = -1
  510. f = lambda x: x.split(pat, n)
  511. else:
  512. if len(pat) == 1:
  513. if n is None or n == 0:
  514. n = -1
  515. f = lambda x: x.split(pat, n)
  516. else:
  517. if n is None or n == -1:
  518. n = 0
  519. regex = re.compile(pat)
  520. f = lambda x: regex.split(x, maxsplit=n)
  521. return _na_map(f, arr)
  522. def str_slice(arr, start=None, stop=None, step=1):
  523. """
  524. Slice substrings from each element in array
  525. Parameters
  526. ----------
  527. start : int or None
  528. stop : int or None
  529. Returns
  530. -------
  531. sliced : array
  532. """
  533. obj = slice(start, stop, step)
  534. f = lambda x: x[obj]
  535. return _na_map(f, arr)
  536. def str_slice_replace(arr, start=None, stop=None, repl=None):
  537. """
  538. Parameters
  539. ----------
  540. Returns
  541. -------
  542. replaced : array
  543. """
  544. raise NotImplementedError
  545. def str_strip(arr, to_strip=None):
  546. """
  547. Strip whitespace (including newlines) from each string in the array
  548. Parameters
  549. ----------
  550. to_strip : str or unicode
  551. Returns
  552. -------
  553. stripped : array
  554. """
  555. return _na_map(lambda x: x.strip(to_strip), arr)
  556. def str_lstrip(arr, to_strip=None):
  557. """
  558. Strip whitespace (including newlines) from left side of each string in the
  559. array
  560. Parameters
  561. ----------
  562. to_strip : str or unicode
  563. Returns
  564. -------
  565. stripped : array
  566. """
  567. return _na_map(lambda x: x.lstrip(to_strip), arr)
  568. def str_rstrip(arr, to_strip=None):
  569. """
  570. Strip whitespace (including newlines) from right side of each string in the
  571. array
  572. Parameters
  573. ----------
  574. to_strip : str or unicode
  575. Returns
  576. -------
  577. stripped : array
  578. """
  579. return _na_map(lambda x: x.rstrip(to_strip), arr)
  580. def str_wrap(arr, width, **kwargs):
  581. """
  582. Wrap long strings to be formatted in paragraphs
  583. Parameters
  584. ----------
  585. Same keyword parameters and defaults as :class:`textwrap.TextWrapper`
  586. width : int
  587. Maximum line-width
  588. expand_tabs : bool, optional
  589. If true, tab characters will be expanded to spaces (default: True)
  590. replace_whitespace : bool, optional
  591. If true, each whitespace character (as defined by string.whitespace) remaining
  592. after tab expansion will be replaced by a single space (default: True)
  593. drop_whitespace : bool, optional
  594. If true, whitespace that, after wrapping, happens to end up at the beginning
  595. or end of a line is dropped (default: True)
  596. break_long_words : bool, optional
  597. If true, then words longer than width will be broken in order to ensure that
  598. no lines are longer than width. If it is false, long words will not be broken,
  599. and some lines may be longer than width. (default: True)
  600. break_on_hyphens : bool, optional
  601. If true, wrapping will occur preferably on whitespace and right after hyphens
  602. in compound words, as it is customary in English. If false, only whitespaces
  603. will be considered as potentially good places for line breaks, but you need
  604. to set break_long_words to false if you want truly insecable words.
  605. (default: True)
  606. Returns
  607. -------
  608. wrapped : array
  609. Notes
  610. -----
  611. Internally, this method uses a :class:`textwrap.TextWrapper` instance with default
  612. settings. To achieve behavior matching R's stringr library str_wrap function, use
  613. the arguments:
  614. expand_tabs = False
  615. replace_whitespace = True
  616. drop_whitespace = True
  617. break_long_words = False
  618. break_on_hyphens = False
  619. Examples
  620. --------
  621. >>> s = pd.Series(['line to be wrapped', 'another line to be wrapped'])
  622. >>> s.str.wrap(12)
  623. 0 line to be\nwrapped
  624. 1 another line\nto be\nwrapped
  625. """
  626. kwargs['width'] = width
  627. tw = textwrap.TextWrapper(**kwargs)
  628. return _na_map(lambda s: '\n'.join(tw.wrap(s)), arr)
  629. def str_get(arr, i):
  630. """
  631. Extract element from lists, tuples, or strings in each element in the array
  632. Parameters
  633. ----------
  634. i : int
  635. Integer index (location)
  636. Returns
  637. -------
  638. items : array
  639. """
  640. f = lambda x: x[i] if len(x) > i else np.nan
  641. return _na_map(f, arr)
  642. def str_decode(arr, encoding, errors="strict"):
  643. """
  644. Decode character string to unicode using indicated encoding
  645. Parameters
  646. ----------
  647. encoding : string
  648. errors : string
  649. Returns
  650. -------
  651. decoded : array
  652. """
  653. f = lambda x: x.decode(encoding, errors)
  654. return _na_map(f, arr)
  655. def str_encode(arr, encoding, errors="strict"):
  656. """
  657. Encode character string to some other encoding using indicated encoding
  658. Parameters
  659. ----------
  660. encoding : string
  661. errors : string
  662. Returns
  663. -------
  664. encoded : array
  665. """
  666. f = lambda x: x.encode(encoding, errors)
  667. return _na_map(f, arr)
  668. def _noarg_wrapper(f):
  669. def wrapper(self):
  670. result = f(self.series)
  671. return self._wrap_result(result)
  672. wrapper.__name__ = f.__name__
  673. if f.__doc__:
  674. wrapper.__doc__ = f.__doc__
  675. return wrapper
  676. def _pat_wrapper(f, flags=False, na=False, **kwargs):
  677. def wrapper1(self, pat):
  678. result = f(self.series, pat)
  679. return self._wrap_result(result)
  680. def wrapper2(self, pat, flags=0, **kwargs):
  681. result = f(self.series, pat, flags=flags, **kwargs)
  682. return self._wrap_result(result)
  683. def wrapper3(self, pat, na=np.nan):
  684. result = f(self.series, pat, na=na)
  685. return self._wrap_result(result)
  686. wrapper = wrapper3 if na else wrapper2 if flags else wrapper1
  687. wrapper.__name__ = f.__name__
  688. if f.__doc__:
  689. wrapper.__doc__ = f.__doc__
  690. return wrapper
  691. def copy(source):
  692. "Copy a docstring from another source function (if present)"
  693. def do_copy(target):
  694. if source.__doc__:
  695. target.__doc__ = source.__doc__
  696. return target
  697. return do_copy
  698. class StringMethods(object):
  699. """
  700. Vectorized string functions for Series. NAs stay NA unless handled
  701. otherwise by a particular method. Patterned after Python's string methods,
  702. with some inspiration from R's stringr package.
  703. Examples
  704. --------
  705. >>> s.str.split('_')
  706. >>> s.str.replace('_', '')
  707. """
  708. def __init__(self, series):
  709. self.series = series
  710. def __getitem__(self, key):
  711. if isinstance(key, slice):
  712. return self.slice(start=key.start, stop=key.stop,
  713. step=key.step)
  714. else:
  715. return self.get(key)
  716. def __iter__(self):
  717. i = 0
  718. g = self.get(i)
  719. while g.notnull().any():
  720. yield g
  721. i += 1
  722. g = self.get(i)
  723. def _wrap_result(self, result):
  724. if not hasattr(result, 'ndim'):
  725. return result
  726. elif result.ndim == 1:
  727. name = getattr(result, 'name', None)
  728. return Series(result, index=self.series.index,
  729. name=name or self.series.name)
  730. else:
  731. assert result.ndim < 3
  732. return DataFrame(result, index=self.series.index)
  733. @copy(str_cat)
  734. def cat(self, others=None, sep=None, na_rep=None):
  735. result = str_cat(self.series, others=others, sep=sep, na_rep=na_rep)
  736. return self._wrap_result(result)
  737. @copy(str_split)
  738. def split(self, pat=None, n=-1):
  739. result = str_split(self.series, pat, n=n)
  740. return self._wrap_result(result)
  741. @copy(str_get)
  742. def get(self, i):
  743. result = str_get(self.series, i)
  744. return self._wrap_result(result)
  745. @copy(str_join)
  746. def join(self, sep):
  747. result = str_join(self.series, sep)
  748. return self._wrap_result(result)
  749. @copy(str_contains)
  750. def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
  751. result = str_contains(self.series, pat, case=case, flags=flags,
  752. na=na, regex=regex)
  753. return self._wrap_result(result)
  754. @copy(str_match)
  755. def match(self, pat, case=True, flags=0, na=np.nan, as_indexer=False):
  756. result = str_match(self.series, pat, case=case, flags=flags,
  757. na=na, as_indexer=as_indexer)
  758. return self._wrap_result(result)
  759. @copy(str_replace)
  760. def replace(self, pat, repl, n=-1, case=True, flags=0):
  761. result = str_replace(self.series, pat, repl, n=n, case=case,
  762. flags=flags)
  763. return self._wrap_result(result)
  764. @copy(str_repeat)
  765. def repeat(self, repeats):
  766. result = str_repeat(self.series, repeats)
  767. return self._wrap_result(result)
  768. @copy(str_pad)
  769. def pad(self, width, side='left'):
  770. result = str_pad(self.series, width, side=side)
  771. return self._wrap_result(result)
  772. @copy(str_center)
  773. def center(self, width):
  774. result = str_center(self.series, width)
  775. return self._wrap_result(result)
  776. @copy(str_slice)
  777. def slice(self, start=None, stop=None, step=1):
  778. result = str_slice(self.series, start, stop)
  779. return self._wrap_result(result)
  780. @copy(str_slice)
  781. def slice_replace(self, i=None, j=None):
  782. raise NotImplementedError
  783. @copy(str_decode)
  784. def decode(self, encoding, errors="strict"):
  785. result = str_decode(self.series, encoding, errors)
  786. return self._wrap_result(result)
  787. @copy(str_encode)
  788. def encode(self, encoding, errors="strict"):
  789. result = str_encode(self.series, encoding, errors)
  790. return self._wrap_result(result)
  791. @copy(str_strip)
  792. def strip(self, to_strip=None):
  793. result = str_strip(self.series, to_strip)
  794. return self._wrap_result(result)
  795. @copy(str_lstrip)
  796. def lstrip(self, to_strip=None):
  797. result = str_lstrip(self.series, to_strip)
  798. return self._wrap_result(result)
  799. @copy(str_rstrip)
  800. def rstrip(self, to_strip=None):
  801. result = str_rstrip(self.series, to_strip)
  802. return self._wrap_result(result)
  803. @copy(str_wrap)
  804. def wrap(self, width, **kwargs):
  805. result = str_wrap(self.series, width, **kwargs)
  806. return self._wrap_result(result)
  807. @copy(str_get_dummies)
  808. def get_dummies(self, sep='|'):
  809. result = str_get_dummies(self.series, sep)
  810. return self._wrap_result(result)
  811. count = _pat_wrapper(str_count, flags=True)
  812. startswith = _pat_wrapper(str_startswith, na=True)
  813. endswith = _pat_wrapper(str_endswith, na=True)
  814. findall = _pat_wrapper(str_findall, flags=True)
  815. extract = _pat_wrapper(str_extract, flags=True)
  816. len = _noarg_wrapper(str_len)
  817. lower = _noarg_wrapper(str_lower)
  818. upper = _noarg_wrapper(str_upper)
  819. title = _noarg_wrapper(str_title)