StringOps.py - To generate dependencies for py2exe/py2app

/lib/pwiki/StringOps.py

https://bitbucket.org/xkjq/wikidpad_svn · Python · 2087 lines · 1363 code · 277 blank · 447 comment · 174 complexity · 11bb72f8177a3572d741e7ddc2ef3916 MD5 · raw file
Large files are truncated click here to view the full file

## -*- coding: ISO-8859-1 -*-



"""

Various string operations, like unicode encoding/decoding,

creating diff information for plain byte sequences

"""



import os, traceback



from struct import pack, unpack



import difflib, codecs, os.path, random, base64, locale, hashlib, tempfile, math



# import urllib_red as urllib

import urllib, urlparse, cgi



from codecs import BOM_UTF8, BOM_UTF16_BE, BOM_UTF16_LE



import wx



import re as _re # import pwiki.srePersistent as reimport pwiki.srePersistent as _re

from WikiExceptions import *



from Utilities import between





LINEEND_SPLIT_RE = _re.compile(r"\r\n?|\n", _re.UNICODE)



from SystemInfo import isUnicode, isOSX, isLinux, isWindows, isWin9x





# To generate dependencies for py2exe/py2app

import encodings.utf_8, encodings.latin_1, encodings.utf_16, \

        encodings.utf_16_be, encodings.utf_16_le, encodings.ascii







# ---------- Encoding conversion ----------





utf8Enc = codecs.getencoder("utf-8")

utf8Dec = codecs.getdecoder("utf-8")

utf8Reader = codecs.getreader("utf-8")

utf8Writer = codecs.getwriter("utf-8")



def convertLineEndings(text, newLe):

    """

    Convert line endings of text to string newLe which should be

    "\n", "\r" or "\r\n". If newLe or text is unicode, the result

    will be unicode, too.

    """

    return newLe.join(LINEEND_SPLIT_RE.split(text))



def lineendToInternal(text):

    return convertLineEndings(text, "\n")







if isOSX():

    # generate dependencies for py2app

    import encodings.mac_roman

    _mbcsEnc = codecs.getencoder("mac_roman")

    _mbcsDec = codecs.getdecoder("mac_roman")

    mbcsReader = codecs.getreader("mac_roman")

    mbcsWriter = codecs.getwriter("mac_roman")



    def lineendToOs(text):

        return convertLineEndings(text, "\r")



elif isLinux():

    # Could be wrong encoding

#     LINUX_ENCODING = "latin-1"

#     LINUX_ENCODING = "utf8"

    LINUX_ENCODING = locale.getpreferredencoding()



    if not LINUX_ENCODING:

        LINUX_ENCODING = "utf8"



    _mbcsEnc = codecs.getencoder(LINUX_ENCODING)

    _mbcsDec = codecs.getdecoder(LINUX_ENCODING)

    mbcsReader = codecs.getreader(LINUX_ENCODING)

    mbcsWriter = codecs.getwriter(LINUX_ENCODING)



    def lineendToOs(text):

        return convertLineEndings(text, "\n")



else:

    # generate dependencies for py2exe

    import encodings.ascii

    import encodings.mbcs

    _mbcsEnc = codecs.getencoder("mbcs")

    _mbcsDec = codecs.getdecoder("mbcs")

    mbcsReader = codecs.getreader("mbcs")

    mbcsWriter = codecs.getwriter("mbcs")



    def lineendToOs(text):

        return convertLineEndings(text, "\r\n")





def mbcsEnc(input, errors="strict"):

    if isinstance(input, str):

        return input, len(input)

    else:

        return _mbcsEnc(input, errors)





def mbcsDec(input, errors="strict"):

    if isinstance(input, unicode):

        return input, len(input)

    else:

        return _mbcsDec(input, errors)







if os.path.supports_unicode_filenames:

    def dummy(s):

        return s



    pathEnc = dummy

    pathDec = dummy

else:

    def pathEnc(s):

        if s is None:

            return None

        return mbcsEnc(s, "replace")[0]



    def pathDec(s):

        if s is None:

            return None

        return mbcsDec(s, "replace")[0]





if isWindows():

    if not os.path.supports_unicode_filenames:

        raise InternalError("This Python version does not support unicode paths")

    

    # To process pathes longer than 255 characters, Windows (NT and following)

    # expects an absolute path prefixed with \\?\



    def longPathEnc(s):

        if s is None:

            return None

#         if s.startswith("\\\\?\\"):

        if s.startswith("\\\\"):

            return s



        return u"\\\\?\\" + os.path.abspath(s)



    def longPathDec(s):

        if s is None:

            return None

        if s.startswith("\\\\?\\"):

            return s[4:]



        return s



else:

    longPathEnc = pathEnc

    longPathDec = pathDec





if isUnicode():

    def uniToGui(text):

        """

        Convert unicode text to a format usable for wx GUI

        """

        return text   # Nothing to do



    def guiToUni(text):

        """

        Convert wx GUI string format to unicode

        """

        return text   # Nothing to do

else:

    def uniToGui(text):

        """

        Convert unicode text to a format usable for wx GUI

        """

        return mbcsEnc(text, "replace")[0]



    def guiToUni(text):

        """

        Convert wx GUI string format to unicode

        """

        return mbcsDec(text, "replace")[0]





# TODO!

def unicodeToCompFilename(us):

    """

    Encode a unicode filename to a filename compatible to (hopefully)

    any filesystem encoding by converting unicode to '=xx' for

    characters up to 255 and '$xxxx' above. Each 'x represents a hex

    character

    """

    result = []

    for c in us:

        if ord(c) > 255:

            result.append("$%04x" % ord(c))

            continue

        if c in u"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"+\

                u"{}()+-_,.%":   # Allowed characters

            result.append(str(c))

            continue



        result.append("=%02x" % ord(c))



    return "".join(result)





# def unicodeToAllCharFilename



def strWithNone(s):

    if s is None:

        return ""

    

    return s



def uniWithNone(u):

    if u is None:

        return u""

    

    return u





def strToBool(s, default=False):

    """

    Try to interpret string (or unicode) s as

    boolean, return default if string can't be

    interpreted

    """



    if s is None:

        return default



    # Try to interpret as integer

    try:

        return int(s) != 0

    except ValueError:

        # Not an integer

        s = s.lower()

        if s in (u"true", u"yes", u"on"):

            return True

        if s in (u"false", u"no", u"off"):

            return False



        return default





# TODO More formats

def fileContentToUnicode(content):

    """

    Try to detect the text encoding of content

    and return converted unicode

    """

    if content.startswith(BOM_UTF8):

        return content[len(BOM_UTF8):].decode("utf-8", "replace")

    elif content.startswith(BOM_UTF16_BE):

        return content[len(BOM_UTF16_BE):].decode("utf-16-be", "replace")

    elif content.startswith(BOM_UTF16_LE):

        return content[len(BOM_UTF16_LE):].decode("utf-16-le", "replace")

    else:

        return mbcsDec(content, "replace")[0]





def contentToUnicode(content):

    """

    Try to detect the text encoding of content

    and return converted unicode

    """

    if isinstance(content, unicode):

        return content



    if content.startswith(BOM_UTF8):

        return content[len(BOM_UTF8):].decode("utf-8", "replace")

    elif content.startswith(BOM_UTF16_BE):

        return content[len(BOM_UTF16_BE):].decode("utf-16-be", "replace")

    elif content.startswith(BOM_UTF16_LE):

        return content[len(BOM_UTF16_LE):].decode("utf-16-le", "replace")

    else:

        try:

            return content.decode("utf-8", "strict")

        except UnicodeDecodeError:

            return mbcsDec(content, "replace")[0]











def loadEntireTxtFile(filename):

    """

    Load entire file (text mode) and return its content.

    """

    rf = open(pathEnc(filename), "rU")

    try:

        result = rf.read()

        return result

    finally:

        rf.close()





# def writeEntireTxtFile(filename, content):

#     """

#     Write entire file (text mode).

#     content can either be a byte string or a tuple or list of byte strings

#     which are then written one by one to the file.

#     """

#     rf = open(pathEnc(filename), "w")

#     try:

#         if isinstance(content, tuple) or isinstance(content, list):

#             for c in content:

#                 rf.write(c)

#         else:

#             rf.write(content)

#         return

#     finally:

#         rf.close()





# def writeEntireFileFast(filename, content, textMode=False):

#     """

#     Fast write of bytestring content without temporary file and

#     error checking.

#     """

#     if textMode:

#         rf = open(pathEnc(filename), "w")

#     else:

#         rf = open(pathEnc(filename), "wb")

# 

#     try:

#         rf.write(content)

#     finally:

#         rf.close()





def loadEntireFile(filename, textMode=False):

    """

    Load entire file and return its content.

    """

    if textMode:

        rf = open(pathEnc(filename), "rU")

    else:

        rf = open(pathEnc(filename), "rb")



    try:

        return rf.read()

    finally:

        rf.close()







def writeEntireFile(filename, content, textMode=False):

    """

    Write entire file.

    content  can either be a bytestring or a tuple or list of bytestrings

    which are then written one by one to the file.

    If textMode is True, content can also be a unistring or sequence 

    of them (no mixed bytestring/unistring sequences allowed!)

    which are then converted to UTF-8 and written to file with prefixed BOM

    for utf-8. In textMode, lineEndings are properly converted to the

    appropriate for the OS.

    """

    import TempFileSet



    basePath = os.path.split(filename)[0]

    suffix = os.path.splitext(filename)[1]



    if basePath == "":

        basePath = u"."



    tempPath = TempFileSet.createTempFile(content, suffix=suffix, path=basePath,

            textMode=textMode)



    if os.path.exists(filename):

        os.unlink(filename)



    os.rename(tempPath, filename)







def getFileSignatureBlock(filename, timeCoarsening=None):

    """

    Returns the file signature block for a given file. It is a bytestring

    containing size and modification date of the file and can be compared to a

    db-stored version to check for file changes outside of WikidPad.

    

    The  timeCoarsening  can be a number of seconds (or fractions thereof).

    The modification time is rounded UP to a number divisible by timeCoarsening.

    

    If a wiki is moved between file systems with different time granularity

    (e.g. NTFS uses 100ns, FAT uses 2s for mod. time) the file would be seen as

    dirty and cache data would be rebuild without need without coarsening.

    """

    statinfo = os.stat(pathEnc(filename))

    

    if timeCoarsening is None or timeCoarsening <= 0:

        return pack(">BQd", 0, statinfo.st_size, statinfo.st_mtime)

    

    ct = int(math.ceil(statinfo.st_mtime / timeCoarsening)) * timeCoarsening

    

    return pack(">BQd", 0, statinfo.st_size, ct)



    





def removeBracketsFilename(fn):

    """

    Remove brackets (real brackets, not configurable) from a filename

    """

    n, ext = os.path.splitext(fn)

    if n.startswith(u"[") and n.endswith(u"]"):

        n = n[1:-1]



    return n + ext





def revStr(s):

    """

    Return reversed string

    """

    s = list(s)

    s.reverse()

    return u"".join(s)



def splitKeep(s, delim):

    """

    Similar to split, but keeps the delimiter as separate element, e.g.

    splitKeep("aaabaaabaa", "b") -> ["aaa", "b", "aaa", "b", "aa"]

    """

    result = []

    for e in s.split(delim):

        result.append(e)

        result.append(delim)



    return result[:-1]



def splitIndentDeepness(text):

    """

    Return tuple (d, t) where d is deepness of indentation and t is text

    without the indentation.

    """

    pl = len(text)

    text = text.lstrip()

    return (pl-len(text), text)

    

def splitIndent(text):

    """

    Return tuple (ind, t) where ind is a string of the indentation characters

    (normally spaces) and t is text without the indentation.

    """

    pl = len(text)

    textOnly = text.lstrip()

    return (text[:pl-len(textOnly)], textOnly)



def measureIndent(indent):

    return len(indent)





def findLineStart(text, pos):

    # This is even right if no newline is found

    return text.rfind(u"\n", 0, pos) + 1





def findLineEnd(text, pos):

    result = text.find(u"\n", pos)

    if result == -1:

        return len(text)

    else:

        return result

    

    



LASTWORDSTART_RE = _re.compile(r"(?:.*\W)?()\w", _re.UNICODE)

FIRSTWORDEND_RE = _re.compile(r".*?()(?:\W|(?!.))", _re.UNICODE)







def getNearestWordStart(text, pos):

    lsPos = findLineStart(text, pos)



    match = LASTWORDSTART_RE.match(text, lsPos, pos + 1)

    if match is not None:

        return match.start(1)

    else:

        return pos

        



def getNearestWordEnd(text, pos):

    match = FIRSTWORDEND_RE.match(text, pos)

    if match is not None:

        return match.start(1)

    else:

        return pos





def styleSelection(text, start, afterEnd, startChars, endChars=None):

    """

    Called when selected text (between start and afterEnd)

    e.g. in editor should be styled with startChars and endChars

    text -- Whole text

    start -- Start position of selection

    afterEnd -- After end position of selection



    startChars -- Characters to place before selection

    endChars -- Characters to place after selection. If None, startChars

            is used for that, too

    

    Returns tuple (replacement, repStart, repAfterEnd, selStart, selAfterEnd) where



        replacement -- replacement text

        repStart -- Start of characters to delete in original text

        repAfterEnd -- After end of characters to delete

        selStart -- Recommended start of editor selection after replacement

            was done

        selAfterEnd -- Recommended after end of editor selection after replacement

    """

    if endChars is None:

        endChars = startChars



    if start == afterEnd:

        start = getNearestWordStart(text, start)

        afterEnd = getNearestWordEnd(text, start)

        

    emptySelection = start == afterEnd  # is selection empty



    replacement = startChars + text[start:afterEnd] + endChars



    if emptySelection:

        # If selection is empty, cursor should in the end

        # stand between the style characters

        cursorPos = afterEnd + len(startChars)

    else:

        # If not, it will stand after styled word

        cursorPos = afterEnd + len(startChars) + len(endChars)



    return (replacement, start, afterEnd, cursorPos, cursorPos)



    



def splitFill(text, delim, count, fill=u""):

    """

    Split text by delim into up to count pieces. If less

    pieces than count+1 are available, additional pieces are added containing

    fill.

    """

    result = text.split(delim, count)

    if len(result) < count + 1:

        result += [fill] * (count + 1 - len(result))

    

    return result





# def splitUnifName(unifName):

#     """

#     Split a unified name path and return a list of components.

#     If a part of the path must contain a slash it is quoted as double slash.

#     

#     Some unified names shouldn't be processed by this function, especially

#     "wikipage/..." unifNames

#     """

#     result = 







def matchWhole(reObj, s):

    """

    reObj -- Compiled regular expression

    s -- String to match



    Similar to reObj.match(s), but returns MatchObject only if the

    whole string s is covered by the match, returns None otherwise

    """

    mat = reObj.match(s)

    if not mat:

        return None

    if mat.end(0) < len(s):

        return None



    return mat







def obfuscateShortcut(shortcut):

    """

    Necessary to prevent wxPython from interpreting e.g. CTRL+LEFT in a menu

    item as being a shortcut. I haven't found a better way.

    Unused at the moment.

    """

    return u"".join([u"\u200B" + c for c in shortcut])







## Copied from xml.sax.saxutils and modified to reduce dependencies

def escapeHtml(data):

    """

    Escape &, <, > and line breaks in a unicode string of data.

    """



    # must do ampersand first



    return data.replace(u"&", u"&amp;").replace(u">", u"&gt;").\

            replace(u"<", u"&lt;").replace(u"\n", u"<br />\n")





def escapeHtmlNoBreaks(data):

    """

    Escape &, <, and > (no line breaks) in a unicode string of data.

    """



    # must do ampersand first



    return data.replace(u"&", u"&amp;").replace(u">", u"&gt;").\

            replace(u"<", u"&lt;")









class AbstractHtmlItem:

    """

    Abstract base for some "things" appearing in HTML. This and derived classes

    mainly needed for the "htmlEquivalent" token in a wiki AST

    """

    def __init__(self):

        pass

    

    def asString(self):

        raise NotImplementedError



    def clone(self):

        raise NotImplementedError



    def __repr__(self):

        return self.__class__.__name__ + ":" + self.asString()





class HtmlStartTag(AbstractHtmlItem):

    """

    Regular start tag

    """

    def __init__(self, tag, attributes=None):

        self.tag = tag

        if attributes is None:

            self.attributes = {}

        else:

            self.attributes = dict((k, escapeHtml(v).replace(u"\"", u"&quot;"))

                    for k, v in attributes.iteritems())

    

    def addAttribute(self, key, value):

        if value is None:

            value = key



        self.attributes[key] = escapeHtml(value).replace(u"\"", u"&quot;")





    def addEscapedAttribute(self, key, value):

        if value is None:

            value = key



        self.attributes[key] = value





    def addEscapedAttributes(self, attrSeq):

        for key, value in attrSeq:

            self.addEscapedAttribute(key, value)





    def getTag(self):

        return self.tag



    def getStringForAttributes(self):

        return u" ".join(

                k + u"=\"" + v + u"\""

                for k, v in self.attributes.iteritems())

    

    def asString(self):

        if len(self.attributes) == 0:

            return u"<" + self.tag + u">"

        

        attrString = self.getStringForAttributes()

        return u"<" + self.tag + u" " + attrString + u">"





    def clone(self):

        return HtmlStartTag(self.tag, self.attributes)





class HtmlEmptyTag(HtmlStartTag):

    """

    Start tag which is also end tag

    """

    

    def asString(self):

        if len(self.attributes) == 0:

            return u"<" + self.tag + u" />"

        

        attrString = self.getStringForAttributes()

        return u"<" + self.tag + u" " + attrString + u" />"



    def clone(self):

        return HtmlEmptyTag(self.tag, self.attributes)





class HtmlEndTag(AbstractHtmlItem):

    """

    Regular end tag

    """

    def __init__(self, tag):

        self.tag = tag

    

    def asString(self):

        return u"</" + self.tag + u">"



    def clone(self):

        return HtmlEndTag(self.tag)





class HtmlEntity(AbstractHtmlItem):

    """

    Entity

    """

    def __init__(self, entity):

        if entity[0] != "&":

            entity = "&" + entity

        

        if entity[-1] != ";":

            entity += ";"

        

        self.entity = entity



    def asString(self):

        return self.entity

    

    def clone(self):

        return HtmlEntity(self.entity)



    



def escapeForIni(text, toEscape=u""):

    """

    Return an escaped version of string. Always escaped will be backslash and

    all characters with ASCII value < 32. Additional characters can be given in

    the toEscape parameter (as unicode string, only characters < 128,

    not the backslash).



    Returns: unicode string

    """

    # Escape '\'

    text = text.replace(u"\\", u"\\x%02x" % ord("\\"))



    # Escape everything with ord < 32

    for i in xrange(32):

        text = text.replace(unichr(i), u"\\x%02x" % i)



    for c in toEscape:

        text = text.replace(c, u"\\x%02x" % ord(c))



    return text





def _unescapeForIniHelper(match):

    return unichr(int(match.group(1), 16))



def unescapeForIni(text):

    """

    Inverse of escapeForIni()

    """

    return _re.sub(ur"\\x([0-9a-f]{2})", _unescapeForIniHelper, text)





# def escapeWithRe(text):

#     return text.replace(u"\\", u"\\\\").replace("\n", "\\n").\

#             replace("\r", "\\r")



def unescapeWithRe(text):

    """

    Unescape things like \n or \f. Throws exception if unescaping fails

    """

    return _re.sub(u"", text, u"", 1)





def re_sub_escape(pattern):

    """

    Escape the replacement pattern for a re.sub function

    """

    return pattern.replace(u"\\", u"\\\\").replace(u"\n", u"\\n").replace(

            u"\r", u"\\r").replace(u"\t", u"\\t").replace(u"\f", u"\\f")





HTML_DIGITCOLOR = _re.compile(

        ur"^#[0-9a-fA-F]{3}(?:[0-9a-fA-F]{3})?$",

        _re.DOTALL | _re.UNICODE | _re.MULTILINE)





# def htmlColorToRgbTuple(desc):

def colorDescToRgbTuple(desc):

    """

    Converts a color description to an RGB tuple or None if

    description is invalid.

    Color description can be:

    HTML 6-digits color, e.g. #C0D623

    HTML 3-digits color, e.g. #4E2 which converts to #44EE22  (TODO: HTML standard?)

    HTML color name

    """

    global HTML_DIGITCOLOR, _COLORBASE



    if not HTML_DIGITCOLOR.match(desc):

        try:

            desc = _COLORBASE[desc.replace(" ", "").lower()]

        except KeyError:

            return None



    if len(desc) == 4:

        desc = "#" + desc[1] + desc[1] + desc[2] + desc[2] + desc[3] + desc[3]

    try:

        r = int(desc[1:3], 16)

        g = int(desc[3:5], 16)

        b = int(desc[5:7], 16)

        return (r, g, b)

    except:

        return None





# def colorDescToRgbTuple(desc):

#     """

#     Converts a color description to an RGB tuple or None if

#     description is invalid.

#     Color description can be:

#     HTML 6-digits color, e.g. #C0D623

#     HTML 3-digits color, e.g. #4E2 which converts to #44EE22  (TODO: HTML standard?)

#     HTML color name

#     """

#     desc = desc.strip()

#     if len(desc) == 0:

#         return None

#     

#     if desc[0] != "#":

#         desc = desc.replace(" ", "").lower()

#         desc = _COLORBASE.get(desc)

#         if desc is None:

#             return None

# 

#     if len(desc) == 4:

#         desc = "#" + desc[1] + desc[1] + desc[2] + desc[2] + desc[3] + desc[3]

# 

#     if len(desc) != 7:

#         return None

#     try:

#         r = int(desc[1:3], 16)

#         g = int(desc[3:5], 16)

#         b = int(desc[5:7], 16)

#         return (r, g, b)

#     except:

#         return None





def rgbToHtmlColor(r, g, b):

    """

    Return HTML color '#hhhhhh' format string.

    """

    return "#%02X%02X%02X" % (r, g, b)





def base64BlockEncode(data):

    """

    Cut a sequence of base64 characters into chunks of 70 characters

    and join them with newlines. Pythons base64 decoder can read this.

    """

    b64 = base64.b64encode(data)



    result = []

    while len(b64) > 70:

        result.append(b64[:70])

        b64 = b64[70:]



    if len(b64) > 0:

        result.append(b64)



    return u"\n".join(result)





# Just for completeness

base64BlockDecode = base64.b64decode







EXTENDED_STRFTIME_RE = _re.compile(

        r"([^%]+|%(?:%|[%aAbBcdHIJmMpSUwWxXyYZ])|(?:%u))",

        _re.DOTALL | _re.UNICODE | _re.MULTILINE)





def formatWxDate(frmStr, date):

    """

    Format a date (wxDateTime) according to frmStr similar to strftime.

    """

    if frmStr == "":

        return frmStr

    

    resParts = []

    

    for part in EXTENDED_STRFTIME_RE.split(frmStr):

        if not part:

            continue

        if part == "%u":

            # Create weekday following ISO-8601

            wd = date.GetWeekDay()

            if wd == 0:

                # Sunday has number 7

                wd = 7

            resParts.append("%i" % wd)

        else:

            resParts.append(part)



    frmStr = "".join(resParts)



    return date.Format(unescapeWithRe(frmStr))





def strftimeUB(frmStr, timet=None):

    """

    Similar to time.strftime, but uses a time_t number as time (no structure),

    also unescapes some backslash codes, supports unicode and shows local time

    if timet is GMT.

    """

    if timet is None:

        return formatWxDate(frmStr, wx.DateTime_Now())



    try:

        return formatWxDate(frmStr, wx.DateTimeFromTimeT(timet))

    except TypeError:

        return _(u"Inval. timestamp")  #  TODO Better errorhandling?







def splitpath(path):

    """

    Cut a path into all of its pieces, starting with drive name, through

    all path components up to the name of the file (if any).

    Returns a list of the elements, first and/or last element may be

    empty strings.

    Maybe use os.path.abspath before calling it

    """

    dr, path = os.path.splitdrive(path)

    result = []

    while True:

        head, last = os.path.split(path)

        if head == path: break

        result.append(last)

        path = head

    result.append(dr)

    result.reverse()

    return result





def getRelativeFilePathAndTestContained(location, toFilePath):

    """

    Returns a relative (if possible) path to address the file

    toFilePath if you are in directory location as first tuple item.





    Function returns None as first tuple item if an absolute path is needed!

    

    Tests if toFilePath is a file or dir contained in location and returns

        truth value in second tuple item



    Both parameters should be normalized with os.path.abspath

    location -- Directory where you are

    toFilePath -- absolute path to file you want to reach

    """

    locParts = splitpath(location)

    if locParts[-1] == "":

        del locParts[-1]



    locLen = len(locParts)

    fileParts = splitpath(toFilePath)



    for i in xrange(len(locParts)):

        if len(fileParts) == 0:

            break  # TODO Error ???



        if os.path.normcase(locParts[0]) != os.path.normcase(fileParts[0]):

            break



        del locParts[0]

        del fileParts[0]



    result = []



    if len(locParts) == locLen:

        # Nothing matches at all, absolute path needed

        return None, False

        

    isContained = len(fileParts) > 0

    if len(locParts) > 0:

        # go back some steps

        result += [".."] * len(locParts)

        isContained = False



    result += fileParts

    

    if len(result) == 0:

        return u"", False

    else:

        return os.path.join(*result), isContained







def relativeFilePath(location, toFilePath):

    """

    Returns a relative (if possible) path to address the file

    toFilePath if you are in directory location.

    Both parameters should be normalized with os.path.abspath



    Function returns None if an absolute path is needed!



    location -- Directory where you are

    toFilePath -- absolute path to file you want to reach

    """

    return getRelativeFilePathAndTestContained(location, toFilePath)[0]





def testContainedInDir(location, toFilePath):

    """

    Tests if toFilePath is a file or dir contained in location.

    Both parameters should be normalized with os.path.abspath

    """

    return getRelativeFilePathAndTestContained(location, toFilePath)[1]









def _asciiFlexibleUrlUnquote(part):

    """

    Unquote ascii-only parts of an url

    """

    if len(part) == 0:

        return u""

    # Get bytes out of percent-quoted URL

    linkBytes = urllib.unquote(part)

    # Try to interpret bytes as UTF-8

    try:

        return linkBytes.decode("utf8", "strict")

    except UnicodeDecodeError:

        # Failed -> try mbcs

        try:

            return mbcsDec(linkBytes, "strict")[0]

        except UnicodeDecodeError:

            # Failed, too -> leave link part unmodified. TODO: Doesn't make sense, will fail as well.

            return unicode(part)





def flexibleUrlUnquote(link):

    """

    Tries to unquote an url.

    TODO: Faster and more elegantly.

    

    link -- unistring

    """

    if link is None:

        return None



    i = 0

    result = SnippetCollector()



    while i < len(link):



        asciiPart = ""

        while i < len(link) and ord(link[i]) < 128:

            asciiPart += chr(ord(link[i]))

            i += 1

        

        result += _asciiFlexibleUrlUnquote(asciiPart)



        unicodePart = u""

        while i < len(link) and ord(link[i]) >= 128:

            unicodePart += link[i]

            i += 1

        

        result += unicodePart

        

    return unicode(result.value())







URL_RESERVED = frozenset((u";", u"?", u":", u"@", u"&", u"=", u"+", u",", u"/",

        u"{", u"}", u"|", u"\\", u"^", u"~", u"[", u"]", u"`", u'"', u"%"))







def urlQuote(s, safe='/'):

    """

    Modified version of urllib.quote supporting unicode.

    

    Each part of a URL, e.g. the path info, the query, etc., has a

    different set of reserved characters that must be quoted.



    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists

    the following reserved characters.



    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |

                  "$" | ","



    Each of these characters is reserved in some component of a URL,

    but not necessarily in all of them.



    The function is intended for quoting the path

    section of a URL.  Thus, it will not encode '/'.  This character

    is reserved, but in typical usage the quote function is being

    called on a path where the existing slash characters are used as

    reserved characters.

    

    The characters u"{", u"}", u"|", u"\", u"^", u"~", u"[", u"]", u"`"

    are considered unsafe and should be quoted as well.

    """

    result = []

    

    for c in s:

        if c not in safe and (ord(c) < 33 or c in URL_RESERVED):

            result.append("%%%02X" % ord(c))

        else:

            result.append(c)



    return "".join(result)







def urlQuoteSpecific(s, toQuote=''):

    """

    Only quote characters in toQuote

    """

    result = []

    

    for c in s:

        if c in toQuote:

            result.append("%%%02X" % ord(c))

        else:

            result.append(c)



    return "".join(result)







def ntUrlFromPathname(p, addSafe=''):

    r"""

    Modified version of nturl2path.pathname2url.



    Convert a DOS/Windows path name to a file url.



            C:\foo\bar\spam.foo



                    becomes



            ///C:/foo/bar/spam.foo

    """

    if not ':' in p:

        # No drive specifier, just convert slashes and quote the name

#         if p[:2] == '\\\\':

#         # path is something like \\host\path\on\remote\host

#         # convert this to ////host/path/on/remote/host

#         # (notice doubling of slashes at the start of the path)

#             p = '\\\\' + p

        components = p.split('\\')

        return urlQuote('/'.join(components), safe='/' + addSafe)

    comp = p.split(':')

    if len(comp) != 2 or len(comp[0]) > 1:

        error = 'Bad path: ' + p

        raise IOError, error



    drive = urlQuote(comp[0].upper(), safe='/' + addSafe)

    components = comp[1].split('\\')

    path = '///' + drive + ':'

    for comp in components:

        if comp:

            path = path + '/' + urlQuote(comp, safe='/' + addSafe)

    return path







def _macpncomp2url(component, addSafe):

    component = urlQuote(component[:31], safe=addSafe)  # We want to quote slashes

    return component



def macUrlFromPathname(pathname, addSafe=''):

    """

    Modified version of macurl2path.pathname2url.



    convert mac pathname to /-delimited pathname

    """

    if '/' in pathname:

        raise RuntimeError, "Cannot convert pathname containing slashes"

    components = pathname.split(':')

    # Remove empty first and/or last component

    if components[0] == '':

        del components[0]

    if components[-1] == '':

        del components[-1]

    # Replace empty string ('::') by .. (will result in '/../' later)

    for i in range(len(components)):

        if components[i] == '':

            components[i] = '..'

    # Truncate names longer than 31 bytes

    components = [_macpncomp2url(c, addSafe) for c in components]

#     components = map(_macpncomp2url, components)



    if os.path.isabs(pathname):

        return '/' + '/'.join(components)

    else:

        return '/'.join(components)





if os.name == 'nt':

    urlFromPathname = ntUrlFromPathname

elif os.name == 'mac':

    urlFromPathname = macUrlFromPathname

else:

    def urlFromPathname(fn, addSafe=''):

        if isinstance(fn, unicode):

            fn = utf8Enc(fn, "replace")[0]

            

        # riscos not supported

        url = urlQuote(fn, safe='/$' + addSafe)

#         url.replace("%24", "$")



        return url









def ntPathnameFromUrl(url, testFileType=True):

    r"""

    Modified version of nturl2path.url2pathname.

    

    Convert a URL to a DOS path.



            ///C|/foo/bar/spam.foo



                    becomes



            C:\foo\bar\spam.foo

            

    testFileType -- ensure that URL has type "file" (and starts with "file:")

            throw RuntimeError if not.

    """

    import string

    if url.startswith("file:") or url.startswith("wiki:"):

        url = url[5:]

    elif testFileType:

        raise RuntimeError, 'Cannot convert non-local URL to pathname'

        

    # Strip fragment or query if present

    url, dummy = decomposeUrlQsFrag(url)



    if (not ':' in url) and (not '|' in url) and (not '%3A' in url) and (not '%3a' in url):

        # No drive specifier, just convert slashes

        if url[:4] == '////':

            # path is something like ////host/path/on/remote/host

            # convert this to \\host\path\on\remote\host

            # (notice halving of slashes at the start of the path)

            url = url[2:]

        components = url.split('/')

        # make sure not to convert quoted slashes :-)

        return flexibleUrlUnquote('\\'.join(components))



    comp = None

    for driveDelim in ('|', ':', '%3A', '%3a'):

        comp = url.split(driveDelim)

        if len(comp) != 2 or len(comp[0]) == 0 or comp[0][-1] not in string.ascii_letters:

            comp = None

            continue

        break



    if comp is None:

        error = 'Bad URL: ' + url

        raise IOError(error)





#     comp = url.split('|')

#     if len(comp) == 1:

#         comp = url.split(':')

# 

#     if len(comp) != 2 or len(comp[0]) == 0 or comp[0][-1] not in string.ascii_letters:

#         error = 'Bad URL: ' + url

#         raise IOError, error



    drive = comp[0][-1].upper()

    components = comp[1].split('/')

    path = drive + ':'

    for comp in components:

        if comp:

            path = path + '\\' + flexibleUrlUnquote(comp)

    return path







def macPathnameFromUrl(url, testFileType=True):

    "Convert /-delimited url to mac pathname"

    #

    # XXXX The .. handling should be fixed...

    #

    tp = urllib.splittype(url)[0]

    if tp and tp != 'file' and tp != 'wiki':

        raise RuntimeError, 'Cannot convert non-local URL to pathname'

    # Turn starting /// into /, an empty hostname means current host

    if url[:3] == '///':

        url = url[2:]

    elif url[:2] == '//':

        raise RuntimeError, 'Cannot convert non-local URL to pathname'



    # Strip fragment or query if present

    url, dummy = decomposeUrlQsFrag(url)



    components = url.split('/')

    # Remove . and embedded ..

    i = 0

    while i < len(components):

        if components[i] == '.':

            del components[i]

        elif components[i] == '..' and i > 0 and \

                                  components[i-1] not in ('', '..'):

            del components[i-1:i+1]

            i = i-1

        elif components[i] == '' and i > 0 and components[i-1] != '':

            del components[i]

        else:

            i = i+1

    if not components[0]:

        # Absolute unix path, don't start with colon

        rv = ':'.join(components[1:])

    else:

        # relative unix path, start with colon. First replace

        # leading .. by empty strings (giving ::file)

        i = 0

        while i < len(components) and components[i] == '..':

            components[i] = ''

            i = i + 1

        rv = ':' + ':'.join(components)

    # and finally unquote slashes and other funny characters

    return flexibleUrlUnquote(rv)





def elsePathnameFromUrl(url, testFileType=True):

    "Convert /-delimited url to pathname"

    #

    # XXXX The .. handling should be fixed...

    #

    if url.startswith("file:///") or url.startswith("wiki:///"):

        url = url[7:]   # Third '/' remains

    elif url.startswith("file:") or url.startswith("wiki:"):

        url = url[5:]

    elif testFileType:

        raise RuntimeError, 'Cannot convert non-local URL to pathname'

    

    # Strip fragment or query if present

    url, dummy = decomposeUrlQsFrag(url)



    return flexibleUrlUnquote(url)









if os.name == 'nt':

    pathnameFromUrl = ntPathnameFromUrl

elif os.name == 'mac':

    pathnameFromUrl = macPathnameFromUrl

else:

#     pathnameFromUrl = flexibleUrlUnquote

    pathnameFromUrl = elsePathnameFromUrl







_DECOMPOSE_URL_RE = _re.compile(ur"([^?#]*)((?:[?#].*)?)", _re.UNICODE | _re.DOTALL);





def decomposeUrlQsFrag(url):

    """

    Find first '?' or '#' (query string or fragment) in URL and split URL

    there so that the parts can be (un-)quoted differently.

    Returns a 2-tuple with main part and additional part of URL.

    """

    return _DECOMPOSE_URL_RE.match(url).groups()

    



def composeUrlQsFrag(mainUrl, additional):

    """

    Compose main URL and additional part back into one URL. Currently a very

    simple function but may become more complex later.

    """

    return mainUrl + additional

    





def _quoteChar(c):

    oc = ord(c)

    if oc < 256:

        return u"%%%02X" % oc

    else:

        return u"@%04X" % oc





_ESCAPING_CHARACTERS = u"%@~"



_FORBIDDEN_CHARACTERS = frozenset(u":/\\*?\"'<>|;![]" + _ESCAPING_CHARACTERS)

_FORBIDDEN_START = _FORBIDDEN_CHARACTERS | frozenset(u".$ -")



# Allowed ascii characters remaining: #&()+,=[]^_`{}





def iterCompatibleFilename(baseName, suffix, asciiOnly=False, maxLength=120,

        randomLength=10):

    """

    Generator to create filenames compatible to (hopefully) all major

    OSs/filesystems.

    

    Encode a unicode filename to a filename compatible to (hopefully)

    any filesystem encoding by converting unicode to '%xx' for

    characters up to 250 and '@xxxx' above. Each 'x represents a hex

    character.

    

    If the resulting name is too long it is shortened.

    

    If the first returned filename isn't accepted, a sequence of random

    characters, delimited by a tilde '~' is added. If the filename is then

    too long it is also shortened.

    

    The first random sequence isn't random but a MD5-hash of baseName.

    

    Each time you ask for next filename, a new sequence of random characters

    is created.

    

    baseName - Base name to use for the filename

    suffix - Suffix (must include the dot) of the filename. The suffix must not

            be empty, is not quoted in any way and should follow the

            rules of the filesystem(s)

    asciiOnly - Iff True, all non-ascii characters are replaced.

    maxLength - Maximum length of filename including encoded basename,

        random sequence and suffix

    randomLength - Length of the random sequence (without leading tilde)



    """

    maxLength = between(20 + len(suffix) + randomLength, maxLength, 250)



    baseName = mbcsDec(baseName)[0]



    if len(baseName) > 0:

        c = baseName[0]

        if ord(c) < 32 or c in _FORBIDDEN_START or \

                (asciiOnly and ord(c) > 127):

            baseQuoted = [_quoteChar(c)]

        else:

            baseQuoted = [c]



        for c in baseName[1:]:

            if ord(c) < 32 or c in _FORBIDDEN_CHARACTERS or \

                    (asciiOnly and ord(c) > 127):

                baseQuoted.append(_quoteChar(c))

            else:

                baseQuoted.append(c)



    else:

        baseQuoted = []



    overallLength = sum(len(bq) for bq in baseQuoted) + len(suffix)



    # Shorten baseQuoted if needed. This method ensures that no half-quoted

    # character (e.g. "@3") is remaining

    while overallLength > maxLength:

        overallLength -= len(baseQuoted.pop())



    if len(baseName) > 0:

        # First try, no random part

        yield u"".join(baseQuoted) + suffix

    

    # Add random part to length

    overallLength += 1 + randomLength

    

    # Shorten baseQuoted again

    while overallLength > maxLength:

        overallLength -= len(baseQuoted.pop())

   

    beforeRandom = u"".join(baseQuoted) + u"~"



    # Now we try MD5-Hash. This is one last try to create a filename which

    # is non-ambigously connected to the baseName

    hashStr = getMd5B36ByString(baseName)[-randomLength:]

    if len(hashStr) < randomLength:

        hashStr = u"0" * (randomLength - len(hashStr)) + hashStr



    yield beforeRandom + hashStr + suffix



    # Now build infinite random names

    while True:

        yield beforeRandom + createRandomString(randomLength) + suffix





def _unquoteCharRepl(matchObj):

    s = matchObj.group(0)

    

    if s[0] == "%":

        v = int(s[1:3], 16)

        return unichr(v)

    else:   #  s[0] == "@":

        v = int(s[1:5], 16)

        return unichr(v)





_FILENAME_UNQUOTE_RE = _re.compile(ur"%[A-Fa-f0-9]{2}|@[A-Fa-f0-9]{4}",

        _re.UNICODE | _re.DOTALL | _re.MULTILINE)





def guessBaseNameByFilename(filename, suffix=u""):

    """

    Try to guess the basename for a particular file name created by

    iterCompatibleFilename() as far as it can be reconstructed.

    """

    # Filename may contain a path, so at first, strip it 

    filename = os.path.basename(filename)

    

    if filename.endswith(suffix):

        filename = filename[:-len(suffix)]

    # else?



    # After a tilde begins the random part, so remove

    tildI = filename.find(u"~")

    if tildI > 0:  # tildI == 0 would mean a nameless file

        filename = filename[:tildI]



    return _FILENAME_UNQUOTE_RE.sub(_unquoteCharRepl, filename)









_RNDBASESEQ = u"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"



def createRandomString(length):

    """

    Create a unicode string of  length  random characters and digits

    """

    return u"".join([random.choice(_RNDBASESEQ) for i in range(length)])







# _RNDBASENOHEX = u"GHIJKLMNOPQRSTUVWXYZ"

# 

# def createRandomStringNoHexFirst(length):

#     """

#     Create a unicode string of  length  random characters and digits.

#     First char. must not be a possible hexadecimal digit.

#     """

#     if length == 0:

#         return u""

# 

#     return random.choice(_RNDBASENOHEX) + u"".join([random.choice(_RNDBASESEQ)

#             for i in range(length - 1)])





def getMd5B36ByString(text):

    """

    Calculate the MD5 hash of text (if unicode after conversion to utf-8)

    and return it as unistring for numeric base 36.

    

    Based on http://code.activestate.com/recipes/111286/

    """

    if isinstance(text, unicode):

        text = text.encode("utf-8")

    

#     digest = hashlib.md5(text).digest()

# 

#     # make an integer out of the number

#     x = 0L

#     for digit in digest:

#        x = x*256 + ord(digit)



    x = int(hashlib.md5(text).hexdigest(), 16)

    

    # create the result in base len(_RNDBASESEQ) (=36)

    res=""

    if x == 0:

        res = _RNDBASESEQ[0]

    while x>0:

        digit = x % len(_RNDBASESEQ)

        res = _RNDBASESEQ[digit] + res

        x //= len(_RNDBASESEQ)



    return res









def boolToChar(b):

    if b:

        return "1"

    else:

        return "\0"



def charToBool(c):

    return c != "\0"



def boolToInt(b):

    if b:

        return 1

    else:

        return 0





def strToBin(s):

    """

    s -- String to convert to binary (NOT unicode!)

    """

    return pack(">I", len(s)) + s   # Why big-endian? Why not?



def binToStr(b):

    """

    Returns tuple (s, br) with string s and rest of the binary data br

    """

    l = unpack(">I", b[:4])[0]

    s = b[4 : 4+l]

    br = b[4+l : ]

    return (s, br)





# def orderBySuggestion(strs, sugg):

#     """

#     Order string iterable  strs  in a way that all strings also present in

#     sequence  sugg  come first in resulting list, then the strings from strs

#     which are not in  sugg  in arbitrary order.

#     """

#     s = set(strs)

#     result = []

#     for e in sugg:

#         if e in s:

#             result.append(e)

#             s.remove(e)

#     

#     for e in s:

#         result.append(e)

#     

#     return result





def wikiUrlToPathWordAndAnchor(url):

    """

    Split a "wiki:" protocol URL into the path of the config file,

    the name of the wikiword and the anchor to open if given in query string.



    Returns (path, wikiword, anchor) where wikiword and/or anchor may be None

    """

    # Change "wiki:" url to "http:" for urlparse

    linkHt = "http:" + url[5:]

    parsed = urlparse.urlparse(linkHt)

    # Parse query string into dictionary

    queryDict = cgi.parse_qs(parsed[4])

    # Retrieve wikiword to open if existing

    # queryDict values are lists of values therefore this expression

    wikiWordToOpen = flexibleUrlUnquote(queryDict.get("page", (None,))[0])

    anchorToOpen = flexibleUrlUnquote(queryDict.get("anchor", (None,))[0])



    # Modify parsed to create clean url by clearing query and fragment

    parsed = list(parsed)

    parsed[4] = ""

    parsed[5] = ""

    parsed = tuple(parsed)



    filePath = pathnameFromUrl(urlparse.urlunparse(parsed)[5:], False)



#     filePath = urllib.url2pathname(url)



    return (filePath, wikiWordToOpen, anchorToOpen)

    

    

def pathWordAndAnchorToWikiUrl(filePath, wikiWordToOpen, anchorToOpen):

    url = urlFromPathname(filePath)

    

    queryStringNeeded = (wikiWordToOpen is not None) or \

            (anchorToOpen is not None)



    result = ["wiki:", url]

    if queryStringNeeded:

        result.append("?")

        ampNeeded = False



        if wikiWordToOpen is not None:

            result.append("page=")

            result.append(urlQuote(wikiWordToOpen, safe=""))

            ampNeeded = True

        

        if anchorToOpen is not None:

            if ampNeeded:

                result.append("&")

            result.append("anchor=")

            result.append(urlQuote(anchorToOpen, safe=""))

            ampNeeded = True



    return "".join(result)

    



def joinRegexes(patternList):

    return u"(?:(?:" + u")|(?:".join(patternList) + u"))"







class SnippetCollector(object):

    """

    Collects (byte/uni)string snippets in a list. This is faster than

    using string += string.

    """

    def __init__(self):

        self.snippets = []

        self.length = 0



    def drop(self, length):

        """

        Remove last  length  (byte/uni)characters

        """

        assert self.length >= length



        while length > 0 and len(self.snippets) > 0:

            if length < len(self.snippets[-1]):

                self.snippets[-1] = self.snippets[-1][:-length]

                self.length -= length

                break;

            

            if length >= len(self.snippets[-1]):

                length -= len(self.snippets[-1])

                self.length -= len(self.snippets[-1])

                del self.snippets[-1]



    def append(self, s):

        if len(s) == 0:

            return



        self.length += len(s)

        self.snippets.append(s)



    

    def __iadd__(self, s):

        self.append(s)

        return self



    def value(self):

        return "".join(self.snippets)

    

    def __len__(self):

        return self.length





class Conjunction:

    """

    Used to create SQL statements. Example:

        conjunction = Conjunction("where ", "and ")

        whereClause = ""

        if ...:

            whereClause += conjunction() + "word = ? "

        if ...:

            whereClause += conjunction() + "key = ? "

    

    will always create a valid where-clause

    """

    def __init__(self, firstpart, otherpart):

        self.firstpart = firstpart

        self.otherpart = otherpart

        self.first = True



    def __call__(self):

        if self.first:

            self.first = False

            return self.firstpart

        else:

            return self.otherpart



    def __repr__(self):

        return "<Conjunction(%s, %s) %s>" % (self.firstpart, self.otherpart,

                self.first)







# ---------- Handling diff information ----------





def difflibToCompact(ops, b):

    """

    Rewrite sequence of op_codes returned by difflib.SequenceMatcher.get_opcodes

    to the compact opcode format.



    0: replace,  1: delete,  2: insert



    b -- second string to match

    """

    result = []

    # ops.reverse()

    for tag, i1, i2, j1, j2 in ops:

        if tag == "equal":

            continue

        elif tag == "replace":

            result.append((0, i1, i2, b[j1:j2]))

        elif tag == "delete":

            result.append((1, i1, i2))

        elif tag == "insert":

            result.append((2, i1, b[j1:j2]))



    retur…
Tech Fingerprint

Alerts (66)

'import *' Avoid to prevent namespace pollution; import specific names or use aliases
22
'def' Ensure functions have docstrings for documentation
54 67 84 96 100 107 116 122 127 140 149 211 213 219 455 459 464 478 488 626 629 648 655 662 667 670 675 683 692 699 710 713 730 733 770 794 1407 1588 1594 1597 1669 1695 1726 1738
'isinstance(' Overuse may indicate design issues; consider polymorphism
101 108 271 310 1562
'open(' Use 'with open()' to ensure Files are properly closed
294 308 326 328 341 343
'list(' Avoid unnecessary list conversions; use generators where possible
421 1657
'global' Avoid global variables; use function parameters or class attributes for better scope management
804
'except:' Avoid catching all exceptions; specify exception types to catch only expected errors
819 852
'del' Avoid unless necessary; Python's garbage collector typically handles object deletion
971 983 984 1724
Complexity hotspot; line 1252 (total complexity: 4)
1252