__init__.py - Define and compile static regexes Define stat…

/plone/i18n/normalizer/init.py

https://github.com/plone/plone.i18n · Python · 215 lines · 109 code · 40 blank · 66 comment · 25 complexity · 83d573684e6c2eb759813d2271ffb27c MD5 · raw file

from .base import baseNormalize
from .interfaces import IFileNameNormalizer
from .interfaces import IIDNormalizer
from .interfaces import IURLNormalizer
from zope.component import queryUtility
from zope.interface import implementer

import re


# Define and compile static regexes
FILENAME_REGEX = re.compile(r"^(.+)\.(\w{,4})$")
IGNORE_REGEX = re.compile(r"['\"]")
NON_WORD_REGEX = re.compile(r"[\W\-]+")
DANGEROUS_CHARS_REGEX = re.compile(r"[!$%&()*+,/:;<=>?@\\^{|}\[\]~`]+")
URL_DANGEROUS_CHARS_REGEX = re.compile(r"[!#$%&()*+,/:;<=>?@\\^{|}\[\]~`]+")
MULTIPLE_DASHES_REGEX = re.compile(r"\-+")
EXTRA_DASHES_REGEX = re.compile(r"(^\-+)|(\-+$)")
UNDERSCORE_START_REGEX = re.compile(r"(^_+)(.*)$")
LOCALE_SPLIT_REGEX = re.compile(r"[_-]")
# Define static constraints
MAX_LENGTH = 50
MAX_FILENAME_LENGTH = 1023
MAX_URL_LENGTH = 255


def cropName(base, maxLength=MAX_LENGTH):
    baseLength = len(base)

    index = baseLength
    while index > maxLength:
        index = base.rfind("-", 0, index)

    if index == -1 and baseLength > maxLength:
        base = base[:maxLength]

    elif index > 0:
        base = base[:index]

    return base


@implementer(IIDNormalizer)
class IDNormalizer:
    """
    This normalizer can normalize any unicode string and returns a
    version that only contains of ASCII characters allowed in a typical
    scripting or programming language id, such as CSS class names or Python
    variable names for example.

    Let's make sure that this implementation actually fulfills the API.

      >>> from zope.interface.verify import verifyClass
      >>> verifyClass(IIDNormalizer, IDNormalizer)
      True
    """

    def normalize(self, text, locale=None, max_length=MAX_LENGTH):
        """
        Returns a normalized text. text has to be a unicode string and locale
        should be a normal locale, for example: 'pt-BR', 'sr@Latn' or 'de'
        """
        if locale is not None:
            # Try to get a normalizer for the locale
            util = queryUtility(IIDNormalizer, name=locale)
            parts = LOCALE_SPLIT_REGEX.split(locale)
            if util is None and len(parts) > 1:
                # Try to get a normalizer for the base language if we asked
                # for one for a language/country combination and found none
                util = queryUtility(IIDNormalizer, name=parts[0])
            # be defensive: if queryUtility() returns an instance of the same
            # normalizer class as this one, we'll loop forever until
            # "RuntimeError: maximum recursion depth exceeded" (ticket #11630)
            if util is not None and util.__class__ is not self.__class__:
                text = util.normalize(text, locale=locale)

        text = baseNormalize(text)

        # lowercase text
        text = text.lower()

        text = IGNORE_REGEX.sub("", text)
        text = NON_WORD_REGEX.sub("-", text)
        text = MULTIPLE_DASHES_REGEX.sub("-", text)
        text = EXTRA_DASHES_REGEX.sub("", text)

        return cropName(text, maxLength=max_length)


@implementer(IFileNameNormalizer)
class FileNameNormalizer:
    """
    This normalizer can normalize any unicode string and returns a version
    that only contains of ASCII characters allowed in a file name.

    Let's make sure that this implementation actually fulfills the API.

      >>> from zope.interface.verify import verifyClass
      >>> verifyClass(IFileNameNormalizer, FileNameNormalizer)
      True
    """

    def normalize(self, text, locale=None, max_length=MAX_FILENAME_LENGTH):
        """
        Returns a normalized text. text has to be a unicode string and locale
        should be a normal locale, for example: 'pt-BR', 'sr@Latn' or 'de'
        """
        if locale is not None:
            # Try to get a normalizer for the locale
            util = queryUtility(IFileNameNormalizer, name=locale)
            parts = LOCALE_SPLIT_REGEX.split(locale)
            if util is None and len(parts) > 1:
                # Try to get a normalizer for the base language if we asked
                # for one for a language/country combination and found none
                util = queryUtility(IFileNameNormalizer, name=parts[0])
            # be defensive: if queryUtility() returns an instance of the same
            # normalizer class as this one, we'll loop forever until
            # "RuntimeError: maximum recursion depth exceeded" (ticket #11630)
            if util is not None and util.__class__ is not self.__class__:
                text = util.normalize(text, locale=locale)

        # Preserve filename extensions
        text = baseNormalize(text)

        # Remove any leading underscores
        m = UNDERSCORE_START_REGEX.match(text)
        if m is not None:
            text = m.groups()[1]

        base = text
        ext = ""

        m = FILENAME_REGEX.match(text)
        if m is not None:
            base = m.groups()[0]
            ext = m.groups()[1]

        base = IGNORE_REGEX.sub("", base)
        base = DANGEROUS_CHARS_REGEX.sub("-", base)
        base = EXTRA_DASHES_REGEX.sub("", base)
        base = MULTIPLE_DASHES_REGEX.sub("-", base)

        base = cropName(base, maxLength=max_length)

        if ext != "":
            base = base + "." + ext

        return base


@implementer(IURLNormalizer)
class URLNormalizer:
    """
    This normalizer can normalize any unicode string and returns a URL-safe
    version that only contains of ASCII characters allowed in a URL.

    Let's make sure that this implementation actually fulfills the API.

      >>> from zope.interface.verify import verifyClass
      >>> verifyClass(IURLNormalizer, URLNormalizer)
      True
    """

    def normalize(self, text, locale=None, max_length=MAX_URL_LENGTH):
        """
        Returns a normalized text. text has to be a unicode string and locale
        should be a normal locale, for example: 'pt-BR', 'sr@Latn' or 'de'
        """
        if locale is not None:
            # Try to get a normalizer for the locale
            util = queryUtility(IURLNormalizer, name=locale)
            parts = LOCALE_SPLIT_REGEX.split(locale)
            if util is None and len(parts) > 1:
                # Try to get a normalizer for the base language if we asked
                # for one for a language/country combination and found none
                util = queryUtility(IURLNormalizer, name=parts[0])
            # be defensive: if queryUtility() returns an instance of the same
            # normalizer class as this one, we'll loop forever until
            # "RuntimeError: maximum recursion depth exceeded" (ticket #11630)
            if util is not None and util.__class__ is not self.__class__:
                text = util.normalize(text, locale=locale)

        text = baseNormalize(text)

        # Remove any leading underscores
        m = UNDERSCORE_START_REGEX.match(text)
        if m is not None:
            text = m.groups()[1]

        # lowercase text
        base = text.lower()
        ext = ""

        m = FILENAME_REGEX.match(base)
        if m is not None:
            base = m.groups()[0]
            ext = m.groups()[1]

        base = IGNORE_REGEX.sub("", base)
        base = NON_WORD_REGEX.sub("-", base)
        base = URL_DANGEROUS_CHARS_REGEX.sub("-", base)
        base = EXTRA_DASHES_REGEX.sub("", base)
        base = MULTIPLE_DASHES_REGEX.sub("-", base)

        base = cropName(base, maxLength=max_length)

        if ext != "":
            base = base + "." + ext

        return base


idnormalizer = IDNormalizer()
filenamenormalizer = FileNameNormalizer()
urlnormalizer = URLNormalizer()
Tech Fingerprint

Standard Library: String & Text
Alerts (7)

'def' Ensure functions have docstrings for documentation
27
Complexity hotspot; lines 60 to 61 (total complexity: 3)
60 61
Complexity hotspot; lines 105 to 106 (total complexity: 3)
105 106
Complexity hotspot; lines 166 to 167 (total complexity: 3)
166 167
/plone/i18n/normalizer/__init__.py

/plone/i18n/normalizer/init.py