PageRenderTime 52ms CodeModel.GetById 11ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/pwiki/StringOps.py

https://bitbucket.org/xkjq/wikidpad_svn
Python | 2087 lines | 1916 code | 56 blank | 115 comment | 17 complexity | 11bb72f8177a3572d741e7ddc2ef3916 MD5 | raw file
Possible License(s): LGPL-2.1

Large files files are truncated, but you can click here to view the full file

  1. ## -*- coding: ISO-8859-1 -*-
  2. """
  3. Various string operations, like unicode encoding/decoding,
  4. creating diff information for plain byte sequences
  5. """
  6. import os, traceback
  7. from struct import pack, unpack
  8. import difflib, codecs, os.path, random, base64, locale, hashlib, tempfile, math
  9. # import urllib_red as urllib
  10. import urllib, urlparse, cgi
  11. from codecs import BOM_UTF8, BOM_UTF16_BE, BOM_UTF16_LE
  12. import wx
  13. import re as _re # import pwiki.srePersistent as reimport pwiki.srePersistent as _re
  14. from WikiExceptions import *
  15. from Utilities import between
  16. LINEEND_SPLIT_RE = _re.compile(r"\r\n?|\n", _re.UNICODE)
  17. from SystemInfo import isUnicode, isOSX, isLinux, isWindows, isWin9x
  18. # To generate dependencies for py2exe/py2app
  19. import encodings.utf_8, encodings.latin_1, encodings.utf_16, \
  20. encodings.utf_16_be, encodings.utf_16_le, encodings.ascii
  21. # ---------- Encoding conversion ----------
  22. utf8Enc = codecs.getencoder("utf-8")
  23. utf8Dec = codecs.getdecoder("utf-8")
  24. utf8Reader = codecs.getreader("utf-8")
  25. utf8Writer = codecs.getwriter("utf-8")
  26. def convertLineEndings(text, newLe):
  27. """
  28. Convert line endings of text to string newLe which should be
  29. "\n", "\r" or "\r\n". If newLe or text is unicode, the result
  30. will be unicode, too.
  31. """
  32. return newLe.join(LINEEND_SPLIT_RE.split(text))
  33. def lineendToInternal(text):
  34. return convertLineEndings(text, "\n")
  35. if isOSX():
  36. # generate dependencies for py2app
  37. import encodings.mac_roman
  38. _mbcsEnc = codecs.getencoder("mac_roman")
  39. _mbcsDec = codecs.getdecoder("mac_roman")
  40. mbcsReader = codecs.getreader("mac_roman")
  41. mbcsWriter = codecs.getwriter("mac_roman")
  42. def lineendToOs(text):
  43. return convertLineEndings(text, "\r")
  44. elif isLinux():
  45. # Could be wrong encoding
  46. # LINUX_ENCODING = "latin-1"
  47. # LINUX_ENCODING = "utf8"
  48. LINUX_ENCODING = locale.getpreferredencoding()
  49. if not LINUX_ENCODING:
  50. LINUX_ENCODING = "utf8"
  51. _mbcsEnc = codecs.getencoder(LINUX_ENCODING)
  52. _mbcsDec = codecs.getdecoder(LINUX_ENCODING)
  53. mbcsReader = codecs.getreader(LINUX_ENCODING)
  54. mbcsWriter = codecs.getwriter(LINUX_ENCODING)
  55. def lineendToOs(text):
  56. return convertLineEndings(text, "\n")
  57. else:
  58. # generate dependencies for py2exe
  59. import encodings.ascii
  60. import encodings.mbcs
  61. _mbcsEnc = codecs.getencoder("mbcs")
  62. _mbcsDec = codecs.getdecoder("mbcs")
  63. mbcsReader = codecs.getreader("mbcs")
  64. mbcsWriter = codecs.getwriter("mbcs")
  65. def lineendToOs(text):
  66. return convertLineEndings(text, "\r\n")
  67. def mbcsEnc(input, errors="strict"):
  68. if isinstance(input, str):
  69. return input, len(input)
  70. else:
  71. return _mbcsEnc(input, errors)
  72. def mbcsDec(input, errors="strict"):
  73. if isinstance(input, unicode):
  74. return input, len(input)
  75. else:
  76. return _mbcsDec(input, errors)
  77. if os.path.supports_unicode_filenames:
  78. def dummy(s):
  79. return s
  80. pathEnc = dummy
  81. pathDec = dummy
  82. else:
  83. def pathEnc(s):
  84. if s is None:
  85. return None
  86. return mbcsEnc(s, "replace")[0]
  87. def pathDec(s):
  88. if s is None:
  89. return None
  90. return mbcsDec(s, "replace")[0]
  91. if isWindows():
  92. if not os.path.supports_unicode_filenames:
  93. raise InternalError("This Python version does not support unicode paths")
  94. # To process pathes longer than 255 characters, Windows (NT and following)
  95. # expects an absolute path prefixed with \\?\
  96. def longPathEnc(s):
  97. if s is None:
  98. return None
  99. # if s.startswith("\\\\?\\"):
  100. if s.startswith("\\\\"):
  101. return s
  102. return u"\\\\?\\" + os.path.abspath(s)
  103. def longPathDec(s):
  104. if s is None:
  105. return None
  106. if s.startswith("\\\\?\\"):
  107. return s[4:]
  108. return s
  109. else:
  110. longPathEnc = pathEnc
  111. longPathDec = pathDec
  112. if isUnicode():
  113. def uniToGui(text):
  114. """
  115. Convert unicode text to a format usable for wx GUI
  116. """
  117. return text # Nothing to do
  118. def guiToUni(text):
  119. """
  120. Convert wx GUI string format to unicode
  121. """
  122. return text # Nothing to do
  123. else:
  124. def uniToGui(text):
  125. """
  126. Convert unicode text to a format usable for wx GUI
  127. """
  128. return mbcsEnc(text, "replace")[0]
  129. def guiToUni(text):
  130. """
  131. Convert wx GUI string format to unicode
  132. """
  133. return mbcsDec(text, "replace")[0]
  134. # TODO!
  135. def unicodeToCompFilename(us):
  136. """
  137. Encode a unicode filename to a filename compatible to (hopefully)
  138. any filesystem encoding by converting unicode to '=xx' for
  139. characters up to 255 and '$xxxx' above. Each 'x represents a hex
  140. character
  141. """
  142. result = []
  143. for c in us:
  144. if ord(c) > 255:
  145. result.append("$%04x" % ord(c))
  146. continue
  147. if c in u"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"+\
  148. u"{}()+-_,.%": # Allowed characters
  149. result.append(str(c))
  150. continue
  151. result.append("=%02x" % ord(c))
  152. return "".join(result)
  153. # def unicodeToAllCharFilename
  154. def strWithNone(s):
  155. if s is None:
  156. return ""
  157. return s
  158. def uniWithNone(u):
  159. if u is None:
  160. return u""
  161. return u
  162. def strToBool(s, default=False):
  163. """
  164. Try to interpret string (or unicode) s as
  165. boolean, return default if string can't be
  166. interpreted
  167. """
  168. if s is None:
  169. return default
  170. # Try to interpret as integer
  171. try:
  172. return int(s) != 0
  173. except ValueError:
  174. # Not an integer
  175. s = s.lower()
  176. if s in (u"true", u"yes", u"on"):
  177. return True
  178. if s in (u"false", u"no", u"off"):
  179. return False
  180. return default
  181. # TODO More formats
  182. def fileContentToUnicode(content):
  183. """
  184. Try to detect the text encoding of content
  185. and return converted unicode
  186. """
  187. if content.startswith(BOM_UTF8):
  188. return content[len(BOM_UTF8):].decode("utf-8", "replace")
  189. elif content.startswith(BOM_UTF16_BE):
  190. return content[len(BOM_UTF16_BE):].decode("utf-16-be", "replace")
  191. elif content.startswith(BOM_UTF16_LE):
  192. return content[len(BOM_UTF16_LE):].decode("utf-16-le", "replace")
  193. else:
  194. return mbcsDec(content, "replace")[0]
  195. def contentToUnicode(content):
  196. """
  197. Try to detect the text encoding of content
  198. and return converted unicode
  199. """
  200. if isinstance(content, unicode):
  201. return content
  202. if content.startswith(BOM_UTF8):
  203. return content[len(BOM_UTF8):].decode("utf-8", "replace")
  204. elif content.startswith(BOM_UTF16_BE):
  205. return content[len(BOM_UTF16_BE):].decode("utf-16-be", "replace")
  206. elif content.startswith(BOM_UTF16_LE):
  207. return content[len(BOM_UTF16_LE):].decode("utf-16-le", "replace")
  208. else:
  209. try:
  210. return content.decode("utf-8", "strict")
  211. except UnicodeDecodeError:
  212. return mbcsDec(content, "replace")[0]
  213. def loadEntireTxtFile(filename):
  214. """
  215. Load entire file (text mode) and return its content.
  216. """
  217. rf = open(pathEnc(filename), "rU")
  218. try:
  219. result = rf.read()
  220. return result
  221. finally:
  222. rf.close()
  223. # def writeEntireTxtFile(filename, content):
  224. # """
  225. # Write entire file (text mode).
  226. # content can either be a byte string or a tuple or list of byte strings
  227. # which are then written one by one to the file.
  228. # """
  229. # rf = open(pathEnc(filename), "w")
  230. # try:
  231. # if isinstance(content, tuple) or isinstance(content, list):
  232. # for c in content:
  233. # rf.write(c)
  234. # else:
  235. # rf.write(content)
  236. # return
  237. # finally:
  238. # rf.close()
  239. # def writeEntireFileFast(filename, content, textMode=False):
  240. # """
  241. # Fast write of bytestring content without temporary file and
  242. # error checking.
  243. # """
  244. # if textMode:
  245. # rf = open(pathEnc(filename), "w")
  246. # else:
  247. # rf = open(pathEnc(filename), "wb")
  248. #
  249. # try:
  250. # rf.write(content)
  251. # finally:
  252. # rf.close()
  253. def loadEntireFile(filename, textMode=False):
  254. """
  255. Load entire file and return its content.
  256. """
  257. if textMode:
  258. rf = open(pathEnc(filename), "rU")
  259. else:
  260. rf = open(pathEnc(filename), "rb")
  261. try:
  262. return rf.read()
  263. finally:
  264. rf.close()
  265. def writeEntireFile(filename, content, textMode=False):
  266. """
  267. Write entire file.
  268. content can either be a bytestring or a tuple or list of bytestrings
  269. which are then written one by one to the file.
  270. If textMode is True, content can also be a unistring or sequence
  271. of them (no mixed bytestring/unistring sequences allowed!)
  272. which are then converted to UTF-8 and written to file with prefixed BOM
  273. for utf-8. In textMode, lineEndings are properly converted to the
  274. appropriate for the OS.
  275. """
  276. import TempFileSet
  277. basePath = os.path.split(filename)[0]
  278. suffix = os.path.splitext(filename)[1]
  279. if basePath == "":
  280. basePath = u"."
  281. tempPath = TempFileSet.createTempFile(content, suffix=suffix, path=basePath,
  282. textMode=textMode)
  283. if os.path.exists(filename):
  284. os.unlink(filename)
  285. os.rename(tempPath, filename)
  286. def getFileSignatureBlock(filename, timeCoarsening=None):
  287. """
  288. Returns the file signature block for a given file. It is a bytestring
  289. containing size and modification date of the file and can be compared to a
  290. db-stored version to check for file changes outside of WikidPad.
  291. The timeCoarsening can be a number of seconds (or fractions thereof).
  292. The modification time is rounded UP to a number divisible by timeCoarsening.
  293. If a wiki is moved between file systems with different time granularity
  294. (e.g. NTFS uses 100ns, FAT uses 2s for mod. time) the file would be seen as
  295. dirty and cache data would be rebuild without need without coarsening.
  296. """
  297. statinfo = os.stat(pathEnc(filename))
  298. if timeCoarsening is None or timeCoarsening <= 0:
  299. return pack(">BQd", 0, statinfo.st_size, statinfo.st_mtime)
  300. ct = int(math.ceil(statinfo.st_mtime / timeCoarsening)) * timeCoarsening
  301. return pack(">BQd", 0, statinfo.st_size, ct)
  302. def removeBracketsFilename(fn):
  303. """
  304. Remove brackets (real brackets, not configurable) from a filename
  305. """
  306. n, ext = os.path.splitext(fn)
  307. if n.startswith(u"[") and n.endswith(u"]"):
  308. n = n[1:-1]
  309. return n + ext
  310. def revStr(s):
  311. """
  312. Return reversed string
  313. """
  314. s = list(s)
  315. s.reverse()
  316. return u"".join(s)
  317. def splitKeep(s, delim):
  318. """
  319. Similar to split, but keeps the delimiter as separate element, e.g.
  320. splitKeep("aaabaaabaa", "b") -> ["aaa", "b", "aaa", "b", "aa"]
  321. """
  322. result = []
  323. for e in s.split(delim):
  324. result.append(e)
  325. result.append(delim)
  326. return result[:-1]
  327. def splitIndentDeepness(text):
  328. """
  329. Return tuple (d, t) where d is deepness of indentation and t is text
  330. without the indentation.
  331. """
  332. pl = len(text)
  333. text = text.lstrip()
  334. return (pl-len(text), text)
  335. def splitIndent(text):
  336. """
  337. Return tuple (ind, t) where ind is a string of the indentation characters
  338. (normally spaces) and t is text without the indentation.
  339. """
  340. pl = len(text)
  341. textOnly = text.lstrip()
  342. return (text[:pl-len(textOnly)], textOnly)
  343. def measureIndent(indent):
  344. return len(indent)
  345. def findLineStart(text, pos):
  346. # This is even right if no newline is found
  347. return text.rfind(u"\n", 0, pos) + 1
  348. def findLineEnd(text, pos):
  349. result = text.find(u"\n", pos)
  350. if result == -1:
  351. return len(text)
  352. else:
  353. return result
  354. LASTWORDSTART_RE = _re.compile(r"(?:.*\W)?()\w", _re.UNICODE)
  355. FIRSTWORDEND_RE = _re.compile(r".*?()(?:\W|(?!.))", _re.UNICODE)
  356. def getNearestWordStart(text, pos):
  357. lsPos = findLineStart(text, pos)
  358. match = LASTWORDSTART_RE.match(text, lsPos, pos + 1)
  359. if match is not None:
  360. return match.start(1)
  361. else:
  362. return pos
  363. def getNearestWordEnd(text, pos):
  364. match = FIRSTWORDEND_RE.match(text, pos)
  365. if match is not None:
  366. return match.start(1)
  367. else:
  368. return pos
  369. def styleSelection(text, start, afterEnd, startChars, endChars=None):
  370. """
  371. Called when selected text (between start and afterEnd)
  372. e.g. in editor should be styled with startChars and endChars
  373. text -- Whole text
  374. start -- Start position of selection
  375. afterEnd -- After end position of selection
  376. startChars -- Characters to place before selection
  377. endChars -- Characters to place after selection. If None, startChars
  378. is used for that, too
  379. Returns tuple (replacement, repStart, repAfterEnd, selStart, selAfterEnd) where
  380. replacement -- replacement text
  381. repStart -- Start of characters to delete in original text
  382. repAfterEnd -- After end of characters to delete
  383. selStart -- Recommended start of editor selection after replacement
  384. was done
  385. selAfterEnd -- Recommended after end of editor selection after replacement
  386. """
  387. if endChars is None:
  388. endChars = startChars
  389. if start == afterEnd:
  390. start = getNearestWordStart(text, start)
  391. afterEnd = getNearestWordEnd(text, start)
  392. emptySelection = start == afterEnd # is selection empty
  393. replacement = startChars + text[start:afterEnd] + endChars
  394. if emptySelection:
  395. # If selection is empty, cursor should in the end
  396. # stand between the style characters
  397. cursorPos = afterEnd + len(startChars)
  398. else:
  399. # If not, it will stand after styled word
  400. cursorPos = afterEnd + len(startChars) + len(endChars)
  401. return (replacement, start, afterEnd, cursorPos, cursorPos)
  402. def splitFill(text, delim, count, fill=u""):
  403. """
  404. Split text by delim into up to count pieces. If less
  405. pieces than count+1 are available, additional pieces are added containing
  406. fill.
  407. """
  408. result = text.split(delim, count)
  409. if len(result) < count + 1:
  410. result += [fill] * (count + 1 - len(result))
  411. return result
  412. # def splitUnifName(unifName):
  413. # """
  414. # Split a unified name path and return a list of components.
  415. # If a part of the path must contain a slash it is quoted as double slash.
  416. #
  417. # Some unified names shouldn't be processed by this function, especially
  418. # "wikipage/..." unifNames
  419. # """
  420. # result =
  421. def matchWhole(reObj, s):
  422. """
  423. reObj -- Compiled regular expression
  424. s -- String to match
  425. Similar to reObj.match(s), but returns MatchObject only if the
  426. whole string s is covered by the match, returns None otherwise
  427. """
  428. mat = reObj.match(s)
  429. if not mat:
  430. return None
  431. if mat.end(0) < len(s):
  432. return None
  433. return mat
  434. def obfuscateShortcut(shortcut):
  435. """
  436. Necessary to prevent wxPython from interpreting e.g. CTRL+LEFT in a menu
  437. item as being a shortcut. I haven't found a better way.
  438. Unused at the moment.
  439. """
  440. return u"".join([u"\u200B" + c for c in shortcut])
  441. ## Copied from xml.sax.saxutils and modified to reduce dependencies
  442. def escapeHtml(data):
  443. """
  444. Escape &, <, > and line breaks in a unicode string of data.
  445. """
  446. # must do ampersand first
  447. return data.replace(u"&", u"&amp;").replace(u">", u"&gt;").\
  448. replace(u"<", u"&lt;").replace(u"\n", u"<br />\n")
  449. def escapeHtmlNoBreaks(data):
  450. """
  451. Escape &, <, and > (no line breaks) in a unicode string of data.
  452. """
  453. # must do ampersand first
  454. return data.replace(u"&", u"&amp;").replace(u">", u"&gt;").\
  455. replace(u"<", u"&lt;")
  456. class AbstractHtmlItem:
  457. """
  458. Abstract base for some "things" appearing in HTML. This and derived classes
  459. mainly needed for the "htmlEquivalent" token in a wiki AST
  460. """
  461. def __init__(self):
  462. pass
  463. def asString(self):
  464. raise NotImplementedError
  465. def clone(self):
  466. raise NotImplementedError
  467. def __repr__(self):
  468. return self.__class__.__name__ + ":" + self.asString()
  469. class HtmlStartTag(AbstractHtmlItem):
  470. """
  471. Regular start tag
  472. """
  473. def __init__(self, tag, attributes=None):
  474. self.tag = tag
  475. if attributes is None:
  476. self.attributes = {}
  477. else:
  478. self.attributes = dict((k, escapeHtml(v).replace(u"\"", u"&quot;"))
  479. for k, v in attributes.iteritems())
  480. def addAttribute(self, key, value):
  481. if value is None:
  482. value = key
  483. self.attributes[key] = escapeHtml(value).replace(u"\"", u"&quot;")
  484. def addEscapedAttribute(self, key, value):
  485. if value is None:
  486. value = key
  487. self.attributes[key] = value
  488. def addEscapedAttributes(self, attrSeq):
  489. for key, value in attrSeq:
  490. self.addEscapedAttribute(key, value)
  491. def getTag(self):
  492. return self.tag
  493. def getStringForAttributes(self):
  494. return u" ".join(
  495. k + u"=\"" + v + u"\""
  496. for k, v in self.attributes.iteritems())
  497. def asString(self):
  498. if len(self.attributes) == 0:
  499. return u"<" + self.tag + u">"
  500. attrString = self.getStringForAttributes()
  501. return u"<" + self.tag + u" " + attrString + u">"
  502. def clone(self):
  503. return HtmlStartTag(self.tag, self.attributes)
  504. class HtmlEmptyTag(HtmlStartTag):
  505. """
  506. Start tag which is also end tag
  507. """
  508. def asString(self):
  509. if len(self.attributes) == 0:
  510. return u"<" + self.tag + u" />"
  511. attrString = self.getStringForAttributes()
  512. return u"<" + self.tag + u" " + attrString + u" />"
  513. def clone(self):
  514. return HtmlEmptyTag(self.tag, self.attributes)
  515. class HtmlEndTag(AbstractHtmlItem):
  516. """
  517. Regular end tag
  518. """
  519. def __init__(self, tag):
  520. self.tag = tag
  521. def asString(self):
  522. return u"</" + self.tag + u">"
  523. def clone(self):
  524. return HtmlEndTag(self.tag)
  525. class HtmlEntity(AbstractHtmlItem):
  526. """
  527. Entity
  528. """
  529. def __init__(self, entity):
  530. if entity[0] != "&":
  531. entity = "&" + entity
  532. if entity[-1] != ";":
  533. entity += ";"
  534. self.entity = entity
  535. def asString(self):
  536. return self.entity
  537. def clone(self):
  538. return HtmlEntity(self.entity)
  539. def escapeForIni(text, toEscape=u""):
  540. """
  541. Return an escaped version of string. Always escaped will be backslash and
  542. all characters with ASCII value < 32. Additional characters can be given in
  543. the toEscape parameter (as unicode string, only characters < 128,
  544. not the backslash).
  545. Returns: unicode string
  546. """
  547. # Escape '\'
  548. text = text.replace(u"\\", u"\\x%02x" % ord("\\"))
  549. # Escape everything with ord < 32
  550. for i in xrange(32):
  551. text = text.replace(unichr(i), u"\\x%02x" % i)
  552. for c in toEscape:
  553. text = text.replace(c, u"\\x%02x" % ord(c))
  554. return text
  555. def _unescapeForIniHelper(match):
  556. return unichr(int(match.group(1), 16))
  557. def unescapeForIni(text):
  558. """
  559. Inverse of escapeForIni()
  560. """
  561. return _re.sub(ur"\\x([0-9a-f]{2})", _unescapeForIniHelper, text)
  562. # def escapeWithRe(text):
  563. # return text.replace(u"\\", u"\\\\").replace("\n", "\\n").\
  564. # replace("\r", "\\r")
  565. def unescapeWithRe(text):
  566. """
  567. Unescape things like \n or \f. Throws exception if unescaping fails
  568. """
  569. return _re.sub(u"", text, u"", 1)
  570. def re_sub_escape(pattern):
  571. """
  572. Escape the replacement pattern for a re.sub function
  573. """
  574. return pattern.replace(u"\\", u"\\\\").replace(u"\n", u"\\n").replace(
  575. u"\r", u"\\r").replace(u"\t", u"\\t").replace(u"\f", u"\\f")
  576. HTML_DIGITCOLOR = _re.compile(
  577. ur"^#[0-9a-fA-F]{3}(?:[0-9a-fA-F]{3})?$",
  578. _re.DOTALL | _re.UNICODE | _re.MULTILINE)
  579. # def htmlColorToRgbTuple(desc):
  580. def colorDescToRgbTuple(desc):
  581. """
  582. Converts a color description to an RGB tuple or None if
  583. description is invalid.
  584. Color description can be:
  585. HTML 6-digits color, e.g. #C0D623
  586. HTML 3-digits color, e.g. #4E2 which converts to #44EE22 (TODO: HTML standard?)
  587. HTML color name
  588. """
  589. global HTML_DIGITCOLOR, _COLORBASE
  590. if not HTML_DIGITCOLOR.match(desc):
  591. try:
  592. desc = _COLORBASE[desc.replace(" ", "").lower()]
  593. except KeyError:
  594. return None
  595. if len(desc) == 4:
  596. desc = "#" + desc[1] + desc[1] + desc[2] + desc[2] + desc[3] + desc[3]
  597. try:
  598. r = int(desc[1:3], 16)
  599. g = int(desc[3:5], 16)
  600. b = int(desc[5:7], 16)
  601. return (r, g, b)
  602. except:
  603. return None
  604. # def colorDescToRgbTuple(desc):
  605. # """
  606. # Converts a color description to an RGB tuple or None if
  607. # description is invalid.
  608. # Color description can be:
  609. # HTML 6-digits color, e.g. #C0D623
  610. # HTML 3-digits color, e.g. #4E2 which converts to #44EE22 (TODO: HTML standard?)
  611. # HTML color name
  612. # """
  613. # desc = desc.strip()
  614. # if len(desc) == 0:
  615. # return None
  616. #
  617. # if desc[0] != "#":
  618. # desc = desc.replace(" ", "").lower()
  619. # desc = _COLORBASE.get(desc)
  620. # if desc is None:
  621. # return None
  622. #
  623. # if len(desc) == 4:
  624. # desc = "#" + desc[1] + desc[1] + desc[2] + desc[2] + desc[3] + desc[3]
  625. #
  626. # if len(desc) != 7:
  627. # return None
  628. # try:
  629. # r = int(desc[1:3], 16)
  630. # g = int(desc[3:5], 16)
  631. # b = int(desc[5:7], 16)
  632. # return (r, g, b)
  633. # except:
  634. # return None
  635. def rgbToHtmlColor(r, g, b):
  636. """
  637. Return HTML color '#hhhhhh' format string.
  638. """
  639. return "#%02X%02X%02X" % (r, g, b)
  640. def base64BlockEncode(data):
  641. """
  642. Cut a sequence of base64 characters into chunks of 70 characters
  643. and join them with newlines. Pythons base64 decoder can read this.
  644. """
  645. b64 = base64.b64encode(data)
  646. result = []
  647. while len(b64) > 70:
  648. result.append(b64[:70])
  649. b64 = b64[70:]
  650. if len(b64) > 0:
  651. result.append(b64)
  652. return u"\n".join(result)
  653. # Just for completeness
  654. base64BlockDecode = base64.b64decode
  655. EXTENDED_STRFTIME_RE = _re.compile(
  656. r"([^%]+|%(?:%|[%aAbBcdHIJmMpSUwWxXyYZ])|(?:%u))",
  657. _re.DOTALL | _re.UNICODE | _re.MULTILINE)
  658. def formatWxDate(frmStr, date):
  659. """
  660. Format a date (wxDateTime) according to frmStr similar to strftime.
  661. """
  662. if frmStr == "":
  663. return frmStr
  664. resParts = []
  665. for part in EXTENDED_STRFTIME_RE.split(frmStr):
  666. if not part:
  667. continue
  668. if part == "%u":
  669. # Create weekday following ISO-8601
  670. wd = date.GetWeekDay()
  671. if wd == 0:
  672. # Sunday has number 7
  673. wd = 7
  674. resParts.append("%i" % wd)
  675. else:
  676. resParts.append(part)
  677. frmStr = "".join(resParts)
  678. return date.Format(unescapeWithRe(frmStr))
  679. def strftimeUB(frmStr, timet=None):
  680. """
  681. Similar to time.strftime, but uses a time_t number as time (no structure),
  682. also unescapes some backslash codes, supports unicode and shows local time
  683. if timet is GMT.
  684. """
  685. if timet is None:
  686. return formatWxDate(frmStr, wx.DateTime_Now())
  687. try:
  688. return formatWxDate(frmStr, wx.DateTimeFromTimeT(timet))
  689. except TypeError:
  690. return _(u"Inval. timestamp") # TODO Better errorhandling?
  691. def splitpath(path):
  692. """
  693. Cut a path into all of its pieces, starting with drive name, through
  694. all path components up to the name of the file (if any).
  695. Returns a list of the elements, first and/or last element may be
  696. empty strings.
  697. Maybe use os.path.abspath before calling it
  698. """
  699. dr, path = os.path.splitdrive(path)
  700. result = []
  701. while True:
  702. head, last = os.path.split(path)
  703. if head == path: break
  704. result.append(last)
  705. path = head
  706. result.append(dr)
  707. result.reverse()
  708. return result
  709. def getRelativeFilePathAndTestContained(location, toFilePath):
  710. """
  711. Returns a relative (if possible) path to address the file
  712. toFilePath if you are in directory location as first tuple item.
  713. Function returns None as first tuple item if an absolute path is needed!
  714. Tests if toFilePath is a file or dir contained in location and returns
  715. truth value in second tuple item
  716. Both parameters should be normalized with os.path.abspath
  717. location -- Directory where you are
  718. toFilePath -- absolute path to file you want to reach
  719. """
  720. locParts = splitpath(location)
  721. if locParts[-1] == "":
  722. del locParts[-1]
  723. locLen = len(locParts)
  724. fileParts = splitpath(toFilePath)
  725. for i in xrange(len(locParts)):
  726. if len(fileParts) == 0:
  727. break # TODO Error ???
  728. if os.path.normcase(locParts[0]) != os.path.normcase(fileParts[0]):
  729. break
  730. del locParts[0]
  731. del fileParts[0]
  732. result = []
  733. if len(locParts) == locLen:
  734. # Nothing matches at all, absolute path needed
  735. return None, False
  736. isContained = len(fileParts) > 0
  737. if len(locParts) > 0:
  738. # go back some steps
  739. result += [".."] * len(locParts)
  740. isContained = False
  741. result += fileParts
  742. if len(result) == 0:
  743. return u"", False
  744. else:
  745. return os.path.join(*result), isContained
  746. def relativeFilePath(location, toFilePath):
  747. """
  748. Returns a relative (if possible) path to address the file
  749. toFilePath if you are in directory location.
  750. Both parameters should be normalized with os.path.abspath
  751. Function returns None if an absolute path is needed!
  752. location -- Directory where you are
  753. toFilePath -- absolute path to file you want to reach
  754. """
  755. return getRelativeFilePathAndTestContained(location, toFilePath)[0]
  756. def testContainedInDir(location, toFilePath):
  757. """
  758. Tests if toFilePath is a file or dir contained in location.
  759. Both parameters should be normalized with os.path.abspath
  760. """
  761. return getRelativeFilePathAndTestContained(location, toFilePath)[1]
  762. def _asciiFlexibleUrlUnquote(part):
  763. """
  764. Unquote ascii-only parts of an url
  765. """
  766. if len(part) == 0:
  767. return u""
  768. # Get bytes out of percent-quoted URL
  769. linkBytes = urllib.unquote(part)
  770. # Try to interpret bytes as UTF-8
  771. try:
  772. return linkBytes.decode("utf8", "strict")
  773. except UnicodeDecodeError:
  774. # Failed -> try mbcs
  775. try:
  776. return mbcsDec(linkBytes, "strict")[0]
  777. except UnicodeDecodeError:
  778. # Failed, too -> leave link part unmodified. TODO: Doesn't make sense, will fail as well.
  779. return unicode(part)
  780. def flexibleUrlUnquote(link):
  781. """
  782. Tries to unquote an url.
  783. TODO: Faster and more elegantly.
  784. link -- unistring
  785. """
  786. if link is None:
  787. return None
  788. i = 0
  789. result = SnippetCollector()
  790. while i < len(link):
  791. asciiPart = ""
  792. while i < len(link) and ord(link[i]) < 128:
  793. asciiPart += chr(ord(link[i]))
  794. i += 1
  795. result += _asciiFlexibleUrlUnquote(asciiPart)
  796. unicodePart = u""
  797. while i < len(link) and ord(link[i]) >= 128:
  798. unicodePart += link[i]
  799. i += 1
  800. result += unicodePart
  801. return unicode(result.value())
  802. URL_RESERVED = frozenset((u";", u"?", u":", u"@", u"&", u"=", u"+", u",", u"/",
  803. u"{", u"}", u"|", u"\\", u"^", u"~", u"[", u"]", u"`", u'"', u"%"))
  804. def urlQuote(s, safe='/'):
  805. """
  806. Modified version of urllib.quote supporting unicode.
  807. Each part of a URL, e.g. the path info, the query, etc., has a
  808. different set of reserved characters that must be quoted.
  809. RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
  810. the following reserved characters.
  811. reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
  812. "$" | ","
  813. Each of these characters is reserved in some component of a URL,
  814. but not necessarily in all of them.
  815. The function is intended for quoting the path
  816. section of a URL. Thus, it will not encode '/'. This character
  817. is reserved, but in typical usage the quote function is being
  818. called on a path where the existing slash characters are used as
  819. reserved characters.
  820. The characters u"{", u"}", u"|", u"\", u"^", u"~", u"[", u"]", u"`"
  821. are considered unsafe and should be quoted as well.
  822. """
  823. result = []
  824. for c in s:
  825. if c not in safe and (ord(c) < 33 or c in URL_RESERVED):
  826. result.append("%%%02X" % ord(c))
  827. else:
  828. result.append(c)
  829. return "".join(result)
  830. def urlQuoteSpecific(s, toQuote=''):
  831. """
  832. Only quote characters in toQuote
  833. """
  834. result = []
  835. for c in s:
  836. if c in toQuote:
  837. result.append("%%%02X" % ord(c))
  838. else:
  839. result.append(c)
  840. return "".join(result)
  841. def ntUrlFromPathname(p, addSafe=''):
  842. r"""
  843. Modified version of nturl2path.pathname2url.
  844. Convert a DOS/Windows path name to a file url.
  845. C:\foo\bar\spam.foo
  846. becomes
  847. ///C:/foo/bar/spam.foo
  848. """
  849. if not ':' in p:
  850. # No drive specifier, just convert slashes and quote the name
  851. # if p[:2] == '\\\\':
  852. # # path is something like \\host\path\on\remote\host
  853. # # convert this to ////host/path/on/remote/host
  854. # # (notice doubling of slashes at the start of the path)
  855. # p = '\\\\' + p
  856. components = p.split('\\')
  857. return urlQuote('/'.join(components), safe='/' + addSafe)
  858. comp = p.split(':')
  859. if len(comp) != 2 or len(comp[0]) > 1:
  860. error = 'Bad path: ' + p
  861. raise IOError, error
  862. drive = urlQuote(comp[0].upper(), safe='/' + addSafe)
  863. components = comp[1].split('\\')
  864. path = '///' + drive + ':'
  865. for comp in components:
  866. if comp:
  867. path = path + '/' + urlQuote(comp, safe='/' + addSafe)
  868. return path
  869. def _macpncomp2url(component, addSafe):
  870. component = urlQuote(component[:31], safe=addSafe) # We want to quote slashes
  871. return component
  872. def macUrlFromPathname(pathname, addSafe=''):
  873. """
  874. Modified version of macurl2path.pathname2url.
  875. convert mac pathname to /-delimited pathname
  876. """
  877. if '/' in pathname:
  878. raise RuntimeError, "Cannot convert pathname containing slashes"
  879. components = pathname.split(':')
  880. # Remove empty first and/or last component
  881. if components[0] == '':
  882. del components[0]
  883. if components[-1] == '':
  884. del components[-1]
  885. # Replace empty string ('::') by .. (will result in '/../' later)
  886. for i in range(len(components)):
  887. if components[i] == '':
  888. components[i] = '..'
  889. # Truncate names longer than 31 bytes
  890. components = [_macpncomp2url(c, addSafe) for c in components]
  891. # components = map(_macpncomp2url, components)
  892. if os.path.isabs(pathname):
  893. return '/' + '/'.join(components)
  894. else:
  895. return '/'.join(components)
  896. if os.name == 'nt':
  897. urlFromPathname = ntUrlFromPathname
  898. elif os.name == 'mac':
  899. urlFromPathname = macUrlFromPathname
  900. else:
  901. def urlFromPathname(fn, addSafe=''):
  902. if isinstance(fn, unicode):
  903. fn = utf8Enc(fn, "replace")[0]
  904. # riscos not supported
  905. url = urlQuote(fn, safe='/$' + addSafe)
  906. # url.replace("%24", "$")
  907. return url
  908. def ntPathnameFromUrl(url, testFileType=True):
  909. r"""
  910. Modified version of nturl2path.url2pathname.
  911. Convert a URL to a DOS path.
  912. ///C|/foo/bar/spam.foo
  913. becomes
  914. C:\foo\bar\spam.foo
  915. testFileType -- ensure that URL has type "file" (and starts with "file:")
  916. throw RuntimeError if not.
  917. """
  918. import string
  919. if url.startswith("file:") or url.startswith("wiki:"):
  920. url = url[5:]
  921. elif testFileType:
  922. raise RuntimeError, 'Cannot convert non-local URL to pathname'
  923. # Strip fragment or query if present
  924. url, dummy = decomposeUrlQsFrag(url)
  925. if (not ':' in url) and (not '|' in url) and (not '%3A' in url) and (not '%3a' in url):
  926. # No drive specifier, just convert slashes
  927. if url[:4] == '////':
  928. # path is something like ////host/path/on/remote/host
  929. # convert this to \\host\path\on\remote\host
  930. # (notice halving of slashes at the start of the path)
  931. url = url[2:]
  932. components = url.split('/')
  933. # make sure not to convert quoted slashes :-)
  934. return flexibleUrlUnquote('\\'.join(components))
  935. comp = None
  936. for driveDelim in ('|', ':', '%3A', '%3a'):
  937. comp = url.split(driveDelim)
  938. if len(comp) != 2 or len(comp[0]) == 0 or comp[0][-1] not in string.ascii_letters:
  939. comp = None
  940. continue
  941. break
  942. if comp is None:
  943. error = 'Bad URL: ' + url
  944. raise IOError(error)
  945. # comp = url.split('|')
  946. # if len(comp) == 1:
  947. # comp = url.split(':')
  948. #
  949. # if len(comp) != 2 or len(comp[0]) == 0 or comp[0][-1] not in string.ascii_letters:
  950. # error = 'Bad URL: ' + url
  951. # raise IOError, error
  952. drive = comp[0][-1].upper()
  953. components = comp[1].split('/')
  954. path = drive + ':'
  955. for comp in components:
  956. if comp:
  957. path = path + '\\' + flexibleUrlUnquote(comp)
  958. return path
  959. def macPathnameFromUrl(url, testFileType=True):
  960. "Convert /-delimited url to mac pathname"
  961. #
  962. # XXXX The .. handling should be fixed...
  963. #
  964. tp = urllib.splittype(url)[0]
  965. if tp and tp != 'file' and tp != 'wiki':
  966. raise RuntimeError, 'Cannot convert non-local URL to pathname'
  967. # Turn starting /// into /, an empty hostname means current host
  968. if url[:3] == '///':
  969. url = url[2:]
  970. elif url[:2] == '//':
  971. raise RuntimeError, 'Cannot convert non-local URL to pathname'
  972. # Strip fragment or query if present
  973. url, dummy = decomposeUrlQsFrag(url)
  974. components = url.split('/')
  975. # Remove . and embedded ..
  976. i = 0
  977. while i < len(components):
  978. if components[i] == '.':
  979. del components[i]
  980. elif components[i] == '..' and i > 0 and \
  981. components[i-1] not in ('', '..'):
  982. del components[i-1:i+1]
  983. i = i-1
  984. elif components[i] == '' and i > 0 and components[i-1] != '':
  985. del components[i]
  986. else:
  987. i = i+1
  988. if not components[0]:
  989. # Absolute unix path, don't start with colon
  990. rv = ':'.join(components[1:])
  991. else:
  992. # relative unix path, start with colon. First replace
  993. # leading .. by empty strings (giving ::file)
  994. i = 0
  995. while i < len(components) and components[i] == '..':
  996. components[i] = ''
  997. i = i + 1
  998. rv = ':' + ':'.join(components)
  999. # and finally unquote slashes and other funny characters
  1000. return flexibleUrlUnquote(rv)
  1001. def elsePathnameFromUrl(url, testFileType=True):
  1002. "Convert /-delimited url to pathname"
  1003. #
  1004. # XXXX The .. handling should be fixed...
  1005. #
  1006. if url.startswith("file:///") or url.startswith("wiki:///"):
  1007. url = url[7:] # Third '/' remains
  1008. elif url.startswith("file:") or url.startswith("wiki:"):
  1009. url = url[5:]
  1010. elif testFileType:
  1011. raise RuntimeError, 'Cannot convert non-local URL to pathname'
  1012. # Strip fragment or query if present
  1013. url, dummy = decomposeUrlQsFrag(url)
  1014. return flexibleUrlUnquote(url)
  1015. if os.name == 'nt':
  1016. pathnameFromUrl = ntPathnameFromUrl
  1017. elif os.name == 'mac':
  1018. pathnameFromUrl = macPathnameFromUrl
  1019. else:
  1020. # pathnameFromUrl = flexibleUrlUnquote
  1021. pathnameFromUrl = elsePathnameFromUrl
  1022. _DECOMPOSE_URL_RE = _re.compile(ur"([^?#]*)((?:[?#].*)?)", _re.UNICODE | _re.DOTALL);
  1023. def decomposeUrlQsFrag(url):
  1024. """
  1025. Find first '?' or '#' (query string or fragment) in URL and split URL
  1026. there so that the parts can be (un-)quoted differently.
  1027. Returns a 2-tuple with main part and additional part of URL.
  1028. """
  1029. return _DECOMPOSE_URL_RE.match(url).groups()
  1030. def composeUrlQsFrag(mainUrl, additional):
  1031. """
  1032. Compose main URL and additional part back into one URL. Currently a very
  1033. simple function but may become more complex later.
  1034. """
  1035. return mainUrl + additional
  1036. def _quoteChar(c):
  1037. oc = ord(c)
  1038. if oc < 256:
  1039. return u"%%%02X" % oc
  1040. else:
  1041. return u"@%04X" % oc
  1042. _ESCAPING_CHARACTERS = u"%@~"
  1043. _FORBIDDEN_CHARACTERS = frozenset(u":/\\*?\"'<>|;![]" + _ESCAPING_CHARACTERS)
  1044. _FORBIDDEN_START = _FORBIDDEN_CHARACTERS | frozenset(u".$ -")
  1045. # Allowed ascii characters remaining: #&()+,=[]^_`{}
  1046. def iterCompatibleFilename(baseName, suffix, asciiOnly=False, maxLength=120,
  1047. randomLength=10):
  1048. """
  1049. Generator to create filenames compatible to (hopefully) all major
  1050. OSs/filesystems.
  1051. Encode a unicode filename to a filename compatible to (hopefully)
  1052. any filesystem encoding by converting unicode to '%xx' for
  1053. characters up to 250 and '@xxxx' above. Each 'x represents a hex
  1054. character.
  1055. If the resulting name is too long it is shortened.
  1056. If the first returned filename isn't accepted, a sequence of random
  1057. characters, delimited by a tilde '~' is added. If the filename is then
  1058. too long it is also shortened.
  1059. The first random sequence isn't random but a MD5-hash of baseName.
  1060. Each time you ask for next filename, a new sequence of random characters
  1061. is created.
  1062. baseName - Base name to use for the filename
  1063. suffix - Suffix (must include the dot) of the filename. The suffix must not
  1064. be empty, is not quoted in any way and should follow the
  1065. rules of the filesystem(s)
  1066. asciiOnly - Iff True, all non-ascii characters are replaced.
  1067. maxLength - Maximum length of filename including encoded basename,
  1068. random sequence and suffix
  1069. randomLength - Length of the random sequence (without leading tilde)
  1070. """
  1071. maxLength = between(20 + len(suffix) + randomLength, maxLength, 250)
  1072. baseName = mbcsDec(baseName)[0]
  1073. if len(baseName) > 0:
  1074. c = baseName[0]
  1075. if ord(c) < 32 or c in _FORBIDDEN_START or \
  1076. (asciiOnly and ord(c) > 127):
  1077. baseQuoted = [_quoteChar(c)]
  1078. else:
  1079. baseQuoted = [c]
  1080. for c in baseName[1:]:
  1081. if ord(c) < 32 or c in _FORBIDDEN_CHARACTERS or \
  1082. (asciiOnly and ord(c) > 127):
  1083. baseQuoted.append(_quoteChar(c))
  1084. else:
  1085. baseQuoted.append(c)
  1086. else:
  1087. baseQuoted = []
  1088. overallLength = sum(len(bq) for bq in baseQuoted) + len(suffix)
  1089. # Shorten baseQuoted if needed. This method ensures that no half-quoted
  1090. # character (e.g. "@3") is remaining
  1091. while overallLength > maxLength:
  1092. overallLength -= len(baseQuoted.pop())
  1093. if len(baseName) > 0:
  1094. # First try, no random part
  1095. yield u"".join(baseQuoted) + suffix
  1096. # Add random part to length
  1097. overallLength += 1 + randomLength
  1098. # Shorten baseQuoted again
  1099. while overallLength > maxLength:
  1100. overallLength -= len(baseQuoted.pop())
  1101. beforeRandom = u"".join(baseQuoted) + u"~"
  1102. # Now we try MD5-Hash. This is one last try to create a filename which
  1103. # is non-ambigously connected to the baseName
  1104. hashStr = getMd5B36ByString(baseName)[-randomLength:]
  1105. if len(hashStr) < randomLength:
  1106. hashStr = u"0" * (randomLength - len(hashStr)) + hashStr
  1107. yield beforeRandom + hashStr + suffix
  1108. # Now build infinite random names
  1109. while True:
  1110. yield beforeRandom + createRandomString(randomLength) + suffix
  1111. def _unquoteCharRepl(matchObj):
  1112. s = matchObj.group(0)
  1113. if s[0] == "%":
  1114. v = int(s[1:3], 16)
  1115. return unichr(v)
  1116. else: # s[0] == "@":
  1117. v = int(s[1:5], 16)
  1118. return unichr(v)
  1119. _FILENAME_UNQUOTE_RE = _re.compile(ur"%[A-Fa-f0-9]{2}|@[A-Fa-f0-9]{4}",
  1120. _re.UNICODE | _re.DOTALL | _re.MULTILINE)
  1121. def guessBaseNameByFilename(filename, suffix=u""):
  1122. """
  1123. Try to guess the basename for a particular file name created by
  1124. iterCompatibleFilename() as far as it can be reconstructed.
  1125. """
  1126. # Filename may contain a path, so at first, strip it
  1127. filename = os.path.basename(filename)
  1128. if filename.endswith(suffix):
  1129. filename = filename[:-len(suffix)]
  1130. # else?
  1131. # After a tilde begins the random part, so remove
  1132. tildI = filename.find(u"~")
  1133. if tildI > 0: # tildI == 0 would mean a nameless file
  1134. filename = filename[:tildI]
  1135. return _FILENAME_UNQUOTE_RE.sub(_unquoteCharRepl, filename)
  1136. _RNDBASESEQ = u"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  1137. def createRandomString(length):
  1138. """
  1139. Create a unicode string of length random characters and digits
  1140. """
  1141. return u"".join([random.choice(_RNDBASESEQ) for i in range(length)])
  1142. # _RNDBASENOHEX = u"GHIJKLMNOPQRSTUVWXYZ"
  1143. #
  1144. # def createRandomStringNoHexFirst(length):
  1145. # """
  1146. # Create a unicode string of length random characters and digits.
  1147. # First char. must not be a possible hexadecimal digit.
  1148. # """
  1149. # if length == 0:
  1150. # return u""
  1151. #
  1152. # return random.choice(_RNDBASENOHEX) + u"".join([random.choice(_RNDBASESEQ)
  1153. # for i in range(length - 1)])
  1154. def getMd5B36ByString(text):
  1155. """
  1156. Calculate the MD5 hash of text (if unicode after conversion to utf-8)
  1157. and return it as unistring for numeric base 36.
  1158. Based on http://code.activestate.com/recipes/111286/
  1159. """
  1160. if isinstance(text, unicode):
  1161. text = text.encode("utf-8")
  1162. # digest = hashlib.md5(text).digest()
  1163. #
  1164. # # make an integer out of the number
  1165. # x = 0L
  1166. # for digit in digest:
  1167. # x = x*256 + ord(digit)
  1168. x = int(hashlib.md5(text).hexdigest(), 16)
  1169. # create the result in base len(_RNDBASESEQ) (=36)
  1170. res=""
  1171. if x == 0:
  1172. res = _RNDBASESEQ[0]
  1173. while x>0:
  1174. digit = x % len(_RNDBASESEQ)
  1175. res = _RNDBASESEQ[digit] + res
  1176. x //= len(_RNDBASESEQ)
  1177. return res
  1178. def boolToChar(b):
  1179. if b:
  1180. return "1"
  1181. else:
  1182. return "\0"
  1183. def charToBool(c):
  1184. return c != "\0"
  1185. def boolToInt(b):
  1186. if b:
  1187. return 1
  1188. else:
  1189. return 0
  1190. def strToBin(s):
  1191. """
  1192. s -- String to convert to binary (NOT unicode!)
  1193. """
  1194. return pack(">I", len(s)) + s # Why big-endian? Why not?
  1195. def binToStr(b):
  1196. """
  1197. Returns tuple (s, br) with string s and rest of the binary data br
  1198. """
  1199. l = unpack(">I", b[:4])[0]
  1200. s = b[4 : 4+l]
  1201. br = b[4+l : ]
  1202. return (s, br)
  1203. # def orderBySuggestion(strs, sugg):
  1204. # """
  1205. # Order string iterable strs in a way that all strings also present in
  1206. # sequence sugg come first in resulting list, then the strings from strs
  1207. # which are not in sugg in arbitrary order.
  1208. # """
  1209. # s = set(strs)
  1210. # result = []
  1211. # for e in sugg:
  1212. # if e in s:
  1213. # result.append(e)
  1214. # s.remove(e)
  1215. #
  1216. # for e in s:
  1217. # result.append(e)
  1218. #
  1219. # return result
  1220. def wikiUrlToPathWordAndAnchor(url):
  1221. """
  1222. Split a "wiki:" protocol URL into the path of the config file,
  1223. the name of the wikiword and the anchor to open if given in query string.
  1224. Returns (path, wikiword, anchor) where wikiword and/or anchor may be None
  1225. """
  1226. # Change "wiki:" url to "http:" for urlparse
  1227. linkHt = "http:" + url[5:]
  1228. parsed = urlparse.urlparse(linkHt)
  1229. # Parse query string into dictionary
  1230. queryDict = cgi.parse_qs(parsed[4])
  1231. # Retrieve wikiword to open if existing
  1232. # queryDict values are lists of values therefore this expression
  1233. wikiWordToOpen = flexibleUrlUnquote(queryDict.get("page", (None,))[0])
  1234. anchorToOpen = flexibleUrlUnquote(queryDict.get("anchor", (None,))[0])
  1235. # Modify parsed to create clean url by clearing query and fragment
  1236. parsed = list(parsed)
  1237. parsed[4] = ""
  1238. parsed[5] = ""
  1239. parsed = tuple(parsed)
  1240. filePath = pathnameFromUrl(urlparse.urlunparse(parsed)[5:], False)
  1241. # filePath = urllib.url2pathname(url)
  1242. return (filePath, wikiWordToOpen, anchorToOpen)
  1243. def pathWordAndAnchorToWikiUrl(filePath, wikiWordToOpen, anchorToOpen):
  1244. url = urlFromPathname(filePath)
  1245. queryStringNeeded = (wikiWordToOpen is not None) or \
  1246. (anchorToOpen is not None)
  1247. result = ["wiki:", url]
  1248. if queryStringNeeded:
  1249. result.append("?")
  1250. ampNeeded = False
  1251. if wikiWordToOpen is not None:
  1252. result.append("page=")
  1253. result.append(urlQuote(wikiWordToOpen, safe=""))
  1254. ampNeeded = True
  1255. if anchorToOpen is not None:
  1256. if ampNeeded:
  1257. result.append("&")
  1258. result.append("anchor=")
  1259. result.append(urlQuote(anchorToOpen, safe=""))
  1260. ampNeeded = True
  1261. return "".join(result)
  1262. def joinRegexes(patternList):
  1263. return u"(?:(?:" + u")|(?:".join(patternList) + u"))"
  1264. class SnippetCollector(object):
  1265. """
  1266. Collects (byte/uni)string snippets in a list. This is faster than
  1267. using string += string.
  1268. """
  1269. def __init__(self):
  1270. self.snippets = []
  1271. self.length = 0
  1272. def drop(self, length):
  1273. """
  1274. Remove last length (byte/uni)characters
  1275. """
  1276. assert self.length >= length
  1277. while length > 0 and len(self.snippets) > 0:
  1278. if length < len(self.snippets[-1]):
  1279. self.snippets[-1] = self.snippets[-1][:-length]
  1280. self.length -= length
  1281. break;
  1282. if length >= len(self.snippets[-1]):
  1283. length -= len(self.snippets[-1])
  1284. self.length -= len(self.snippets[-1])
  1285. del self.snippets[-1]
  1286. def append(self, s):
  1287. if len(s) == 0:
  1288. return
  1289. self.length += len(s)
  1290. self.snippets.append(s)
  1291. def __iadd__(self, s):
  1292. self.append(s)
  1293. return self
  1294. def value(self):
  1295. return "".join(self.snippets)
  1296. def __len__(self):
  1297. return self.length
  1298. class Conjunction:
  1299. """
  1300. Used to create SQL statements. Example:
  1301. conjunction = Conjunction("where ", "and ")
  1302. whereClause = ""
  1303. if ...:
  1304. whereClause += conjunction() + "word = ? "
  1305. if ...:
  1306. whereClause += conjunction() + "key = ? "
  1307. will always create a valid where-clause
  1308. """
  1309. def __init__(self, firstpart, otherpart):
  1310. self.firstpart = firstpart
  1311. self.otherpart = otherpart
  1312. self.first = True
  1313. def __call__(self):
  1314. if self.first:
  1315. self.first = False
  1316. return self.firstpart
  1317. else:
  1318. return self.otherpart
  1319. def __repr__(self):
  1320. return "<Conjunction(%s, %s) %s>" % (self.firstpart, self.otherpart,
  1321. self.first)
  1322. # ---------- Handling diff information ----------
  1323. def difflibToCompact(ops, b):
  1324. """
  1325. Rewrite sequence of op_codes returned by difflib.SequenceMatcher.get_opcodes
  1326. to the compact opcode format.
  1327. 0: replace, 1: delete, 2: insert
  1328. b -- second string to match
  1329. """
  1330. result = []
  1331. # ops.reverse()
  1332. for tag, i1, i2, j1, j2 in ops:
  1333. if tag == "equal":
  1334. continue
  1335. elif tag == "replace":
  1336. result.append((0, i1, i2, b[j1:j2]))
  1337. elif tag == "delete":
  1338. result.append((1, i1, i2))
  1339. elif tag == "insert":
  1340. result.append((2, i1, b[j1:j2]))
  1341. retur

Large files files are truncated, but you can click here to view the full file