/Lib/uuid.py

http://unladen-swallow.googlecode.com/ · Python · 544 lines · 475 code · 28 blank · 41 comment · 21 complexity · 41b61a8c58710c5448e5197a8257a56f MD5 · raw file

  1. r"""UUID objects (universally unique identifiers) according to RFC 4122.
  2. This module provides immutable UUID objects (class UUID) and the functions
  3. uuid1(), uuid3(), uuid4(), uuid5() for generating version 1, 3, 4, and 5
  4. UUIDs as specified in RFC 4122.
  5. If all you want is a unique ID, you should probably call uuid1() or uuid4().
  6. Note that uuid1() may compromise privacy since it creates a UUID containing
  7. the computer's network address. uuid4() creates a random UUID.
  8. Typical usage:
  9. >>> import uuid
  10. # make a UUID based on the host ID and current time
  11. >>> uuid.uuid1()
  12. UUID('a8098c1a-f86e-11da-bd1a-00112444be1e')
  13. # make a UUID using an MD5 hash of a namespace UUID and a name
  14. >>> uuid.uuid3(uuid.NAMESPACE_DNS, 'python.org')
  15. UUID('6fa459ea-ee8a-3ca4-894e-db77e160355e')
  16. # make a random UUID
  17. >>> uuid.uuid4()
  18. UUID('16fd2706-8baf-433b-82eb-8c7fada847da')
  19. # make a UUID using a SHA-1 hash of a namespace UUID and a name
  20. >>> uuid.uuid5(uuid.NAMESPACE_DNS, 'python.org')
  21. UUID('886313e1-3b8a-5372-9b90-0c9aee199e5d')
  22. # make a UUID from a string of hex digits (braces and hyphens ignored)
  23. >>> x = uuid.UUID('{00010203-0405-0607-0809-0a0b0c0d0e0f}')
  24. # convert a UUID to a string of hex digits in standard form
  25. >>> str(x)
  26. '00010203-0405-0607-0809-0a0b0c0d0e0f'
  27. # get the raw 16 bytes of the UUID
  28. >>> x.bytes
  29. '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f'
  30. # make a UUID from a 16-byte string
  31. >>> uuid.UUID(bytes=x.bytes)
  32. UUID('00010203-0405-0607-0809-0a0b0c0d0e0f')
  33. """
  34. __author__ = 'Ka-Ping Yee <ping@zesty.ca>'
  35. RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, RESERVED_FUTURE = [
  36. 'reserved for NCS compatibility', 'specified in RFC 4122',
  37. 'reserved for Microsoft compatibility', 'reserved for future definition']
  38. class UUID(object):
  39. """Instances of the UUID class represent UUIDs as specified in RFC 4122.
  40. UUID objects are immutable, hashable, and usable as dictionary keys.
  41. Converting a UUID to a string with str() yields something in the form
  42. '12345678-1234-1234-1234-123456789abc'. The UUID constructor accepts
  43. five possible forms: a similar string of hexadecimal digits, or a tuple
  44. of six integer fields (with 32-bit, 16-bit, 16-bit, 8-bit, 8-bit, and
  45. 48-bit values respectively) as an argument named 'fields', or a string
  46. of 16 bytes (with all the integer fields in big-endian order) as an
  47. argument named 'bytes', or a string of 16 bytes (with the first three
  48. fields in little-endian order) as an argument named 'bytes_le', or a
  49. single 128-bit integer as an argument named 'int'.
  50. UUIDs have these read-only attributes:
  51. bytes the UUID as a 16-byte string (containing the six
  52. integer fields in big-endian byte order)
  53. bytes_le the UUID as a 16-byte string (with time_low, time_mid,
  54. and time_hi_version in little-endian byte order)
  55. fields a tuple of the six integer fields of the UUID,
  56. which are also available as six individual attributes
  57. and two derived attributes:
  58. time_low the first 32 bits of the UUID
  59. time_mid the next 16 bits of the UUID
  60. time_hi_version the next 16 bits of the UUID
  61. clock_seq_hi_variant the next 8 bits of the UUID
  62. clock_seq_low the next 8 bits of the UUID
  63. node the last 48 bits of the UUID
  64. time the 60-bit timestamp
  65. clock_seq the 14-bit sequence number
  66. hex the UUID as a 32-character hexadecimal string
  67. int the UUID as a 128-bit integer
  68. urn the UUID as a URN as specified in RFC 4122
  69. variant the UUID variant (one of the constants RESERVED_NCS,
  70. RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE)
  71. version the UUID version number (1 through 5, meaningful only
  72. when the variant is RFC_4122)
  73. """
  74. def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None,
  75. int=None, version=None):
  76. r"""Create a UUID from either a string of 32 hexadecimal digits,
  77. a string of 16 bytes as the 'bytes' argument, a string of 16 bytes
  78. in little-endian order as the 'bytes_le' argument, a tuple of six
  79. integers (32-bit time_low, 16-bit time_mid, 16-bit time_hi_version,
  80. 8-bit clock_seq_hi_variant, 8-bit clock_seq_low, 48-bit node) as
  81. the 'fields' argument, or a single 128-bit integer as the 'int'
  82. argument. When a string of hex digits is given, curly braces,
  83. hyphens, and a URN prefix are all optional. For example, these
  84. expressions all yield the same UUID:
  85. UUID('{12345678-1234-5678-1234-567812345678}')
  86. UUID('12345678123456781234567812345678')
  87. UUID('urn:uuid:12345678-1234-5678-1234-567812345678')
  88. UUID(bytes='\x12\x34\x56\x78'*4)
  89. UUID(bytes_le='\x78\x56\x34\x12\x34\x12\x78\x56' +
  90. '\x12\x34\x56\x78\x12\x34\x56\x78')
  91. UUID(fields=(0x12345678, 0x1234, 0x5678, 0x12, 0x34, 0x567812345678))
  92. UUID(int=0x12345678123456781234567812345678)
  93. Exactly one of 'hex', 'bytes', 'bytes_le', 'fields', or 'int' must
  94. be given. The 'version' argument is optional; if given, the resulting
  95. UUID will have its variant and version set according to RFC 4122,
  96. overriding the given 'hex', 'bytes', 'bytes_le', 'fields', or 'int'.
  97. """
  98. if [hex, bytes, bytes_le, fields, int].count(None) != 4:
  99. raise TypeError('need one of hex, bytes, bytes_le, fields, or int')
  100. if hex is not None:
  101. hex = hex.replace('urn:', '').replace('uuid:', '')
  102. hex = hex.strip('{}').replace('-', '')
  103. if len(hex) != 32:
  104. raise ValueError('badly formed hexadecimal UUID string')
  105. int = long(hex, 16)
  106. if bytes_le is not None:
  107. if len(bytes_le) != 16:
  108. raise ValueError('bytes_le is not a 16-char string')
  109. bytes = (bytes_le[3] + bytes_le[2] + bytes_le[1] + bytes_le[0] +
  110. bytes_le[5] + bytes_le[4] + bytes_le[7] + bytes_le[6] +
  111. bytes_le[8:])
  112. if bytes is not None:
  113. if len(bytes) != 16:
  114. raise ValueError('bytes is not a 16-char string')
  115. int = long(('%02x'*16) % tuple(map(ord, bytes)), 16)
  116. if fields is not None:
  117. if len(fields) != 6:
  118. raise ValueError('fields is not a 6-tuple')
  119. (time_low, time_mid, time_hi_version,
  120. clock_seq_hi_variant, clock_seq_low, node) = fields
  121. if not 0 <= time_low < 1<<32L:
  122. raise ValueError('field 1 out of range (need a 32-bit value)')
  123. if not 0 <= time_mid < 1<<16L:
  124. raise ValueError('field 2 out of range (need a 16-bit value)')
  125. if not 0 <= time_hi_version < 1<<16L:
  126. raise ValueError('field 3 out of range (need a 16-bit value)')
  127. if not 0 <= clock_seq_hi_variant < 1<<8L:
  128. raise ValueError('field 4 out of range (need an 8-bit value)')
  129. if not 0 <= clock_seq_low < 1<<8L:
  130. raise ValueError('field 5 out of range (need an 8-bit value)')
  131. if not 0 <= node < 1<<48L:
  132. raise ValueError('field 6 out of range (need a 48-bit value)')
  133. clock_seq = (clock_seq_hi_variant << 8L) | clock_seq_low
  134. int = ((time_low << 96L) | (time_mid << 80L) |
  135. (time_hi_version << 64L) | (clock_seq << 48L) | node)
  136. if int is not None:
  137. if not 0 <= int < 1<<128L:
  138. raise ValueError('int is out of range (need a 128-bit value)')
  139. if version is not None:
  140. if not 1 <= version <= 5:
  141. raise ValueError('illegal version number')
  142. # Set the variant to RFC 4122.
  143. int &= ~(0xc000 << 48L)
  144. int |= 0x8000 << 48L
  145. # Set the version number.
  146. int &= ~(0xf000 << 64L)
  147. int |= version << 76L
  148. self.__dict__['int'] = int
  149. def __cmp__(self, other):
  150. if isinstance(other, UUID):
  151. return cmp(self.int, other.int)
  152. return NotImplemented
  153. def __hash__(self):
  154. return hash(self.int)
  155. def __int__(self):
  156. return self.int
  157. def __repr__(self):
  158. return 'UUID(%r)' % str(self)
  159. def __setattr__(self, name, value):
  160. raise TypeError('UUID objects are immutable')
  161. def __str__(self):
  162. hex = '%032x' % self.int
  163. return '%s-%s-%s-%s-%s' % (
  164. hex[:8], hex[8:12], hex[12:16], hex[16:20], hex[20:])
  165. def get_bytes(self):
  166. bytes = ''
  167. for shift in range(0, 128, 8):
  168. bytes = chr((self.int >> shift) & 0xff) + bytes
  169. return bytes
  170. bytes = property(get_bytes)
  171. def get_bytes_le(self):
  172. bytes = self.bytes
  173. return (bytes[3] + bytes[2] + bytes[1] + bytes[0] +
  174. bytes[5] + bytes[4] + bytes[7] + bytes[6] + bytes[8:])
  175. bytes_le = property(get_bytes_le)
  176. def get_fields(self):
  177. return (self.time_low, self.time_mid, self.time_hi_version,
  178. self.clock_seq_hi_variant, self.clock_seq_low, self.node)
  179. fields = property(get_fields)
  180. def get_time_low(self):
  181. return self.int >> 96L
  182. time_low = property(get_time_low)
  183. def get_time_mid(self):
  184. return (self.int >> 80L) & 0xffff
  185. time_mid = property(get_time_mid)
  186. def get_time_hi_version(self):
  187. return (self.int >> 64L) & 0xffff
  188. time_hi_version = property(get_time_hi_version)
  189. def get_clock_seq_hi_variant(self):
  190. return (self.int >> 56L) & 0xff
  191. clock_seq_hi_variant = property(get_clock_seq_hi_variant)
  192. def get_clock_seq_low(self):
  193. return (self.int >> 48L) & 0xff
  194. clock_seq_low = property(get_clock_seq_low)
  195. def get_time(self):
  196. return (((self.time_hi_version & 0x0fffL) << 48L) |
  197. (self.time_mid << 32L) | self.time_low)
  198. time = property(get_time)
  199. def get_clock_seq(self):
  200. return (((self.clock_seq_hi_variant & 0x3fL) << 8L) |
  201. self.clock_seq_low)
  202. clock_seq = property(get_clock_seq)
  203. def get_node(self):
  204. return self.int & 0xffffffffffff
  205. node = property(get_node)
  206. def get_hex(self):
  207. return '%032x' % self.int
  208. hex = property(get_hex)
  209. def get_urn(self):
  210. return 'urn:uuid:' + str(self)
  211. urn = property(get_urn)
  212. def get_variant(self):
  213. if not self.int & (0x8000 << 48L):
  214. return RESERVED_NCS
  215. elif not self.int & (0x4000 << 48L):
  216. return RFC_4122
  217. elif not self.int & (0x2000 << 48L):
  218. return RESERVED_MICROSOFT
  219. else:
  220. return RESERVED_FUTURE
  221. variant = property(get_variant)
  222. def get_version(self):
  223. # The version bits are only meaningful for RFC 4122 UUIDs.
  224. if self.variant == RFC_4122:
  225. return int((self.int >> 76L) & 0xf)
  226. version = property(get_version)
  227. def _find_mac(command, args, hw_identifiers, get_index):
  228. import os
  229. for dir in ['', '/sbin/', '/usr/sbin']:
  230. executable = os.path.join(dir, command)
  231. if not os.path.exists(executable):
  232. continue
  233. try:
  234. # LC_ALL to get English output, 2>/dev/null to
  235. # prevent output on stderr
  236. cmd = 'LC_ALL=C %s %s 2>/dev/null' % (executable, args)
  237. pipe = os.popen(cmd)
  238. except IOError:
  239. continue
  240. for line in pipe:
  241. words = line.lower().split()
  242. for i in range(len(words)):
  243. if words[i] in hw_identifiers:
  244. return int(words[get_index(i)].replace(':', ''), 16)
  245. return None
  246. def _ifconfig_getnode():
  247. """Get the hardware address on Unix by running ifconfig."""
  248. # This works on Linux ('' or '-a'), Tru64 ('-av'), but not all Unixes.
  249. for args in ('', '-a', '-av'):
  250. mac = _find_mac('ifconfig', args, ['hwaddr', 'ether'], lambda i: i+1)
  251. if mac:
  252. return mac
  253. import socket
  254. ip_addr = socket.gethostbyname(socket.gethostname())
  255. # Try getting the MAC addr from arp based on our IP address (Solaris).
  256. mac = _find_mac('arp', '-an', [ip_addr], lambda i: -1)
  257. if mac:
  258. return mac
  259. # This might work on HP-UX.
  260. mac = _find_mac('lanscan', '-ai', ['lan0'], lambda i: 0)
  261. if mac:
  262. return mac
  263. return None
  264. def _ipconfig_getnode():
  265. """Get the hardware address on Windows by running ipconfig.exe."""
  266. import os, re
  267. dirs = ['', r'c:\windows\system32', r'c:\winnt\system32']
  268. try:
  269. import ctypes
  270. buffer = ctypes.create_string_buffer(300)
  271. ctypes.windll.kernel32.GetSystemDirectoryA(buffer, 300)
  272. dirs.insert(0, buffer.value.decode('mbcs'))
  273. except:
  274. pass
  275. for dir in dirs:
  276. try:
  277. pipe = os.popen(os.path.join(dir, 'ipconfig') + ' /all')
  278. except IOError:
  279. continue
  280. for line in pipe:
  281. value = line.split(':')[-1].strip().lower()
  282. if re.match('([0-9a-f][0-9a-f]-){5}[0-9a-f][0-9a-f]', value):
  283. return int(value.replace('-', ''), 16)
  284. def _netbios_getnode():
  285. """Get the hardware address on Windows using NetBIOS calls.
  286. See http://support.microsoft.com/kb/118623 for details."""
  287. import win32wnet, netbios
  288. ncb = netbios.NCB()
  289. ncb.Command = netbios.NCBENUM
  290. ncb.Buffer = adapters = netbios.LANA_ENUM()
  291. adapters._pack()
  292. if win32wnet.Netbios(ncb) != 0:
  293. return
  294. adapters._unpack()
  295. for i in range(adapters.length):
  296. ncb.Reset()
  297. ncb.Command = netbios.NCBRESET
  298. ncb.Lana_num = ord(adapters.lana[i])
  299. if win32wnet.Netbios(ncb) != 0:
  300. continue
  301. ncb.Reset()
  302. ncb.Command = netbios.NCBASTAT
  303. ncb.Lana_num = ord(adapters.lana[i])
  304. ncb.Callname = '*'.ljust(16)
  305. ncb.Buffer = status = netbios.ADAPTER_STATUS()
  306. if win32wnet.Netbios(ncb) != 0:
  307. continue
  308. status._unpack()
  309. bytes = map(ord, status.adapter_address)
  310. return ((bytes[0]<<40L) + (bytes[1]<<32L) + (bytes[2]<<24L) +
  311. (bytes[3]<<16L) + (bytes[4]<<8L) + bytes[5])
  312. # Thanks to Thomas Heller for ctypes and for his help with its use here.
  313. # If ctypes is available, use it to find system routines for UUID generation.
  314. _uuid_generate_random = _uuid_generate_time = _UuidCreate = None
  315. try:
  316. import ctypes, ctypes.util
  317. # The uuid_generate_* routines are provided by libuuid on at least
  318. # Linux and FreeBSD, and provided by libc on Mac OS X.
  319. for libname in ['uuid', 'c']:
  320. try:
  321. lib = ctypes.CDLL(ctypes.util.find_library(libname))
  322. except:
  323. continue
  324. if hasattr(lib, 'uuid_generate_random'):
  325. _uuid_generate_random = lib.uuid_generate_random
  326. if hasattr(lib, 'uuid_generate_time'):
  327. _uuid_generate_time = lib.uuid_generate_time
  328. # On Windows prior to 2000, UuidCreate gives a UUID containing the
  329. # hardware address. On Windows 2000 and later, UuidCreate makes a
  330. # random UUID and UuidCreateSequential gives a UUID containing the
  331. # hardware address. These routines are provided by the RPC runtime.
  332. # NOTE: at least on Tim's WinXP Pro SP2 desktop box, while the last
  333. # 6 bytes returned by UuidCreateSequential are fixed, they don't appear
  334. # to bear any relationship to the MAC address of any network device
  335. # on the box.
  336. try:
  337. lib = ctypes.windll.rpcrt4
  338. except:
  339. lib = None
  340. _UuidCreate = getattr(lib, 'UuidCreateSequential',
  341. getattr(lib, 'UuidCreate', None))
  342. except:
  343. pass
  344. def _unixdll_getnode():
  345. """Get the hardware address on Unix using ctypes."""
  346. _buffer = ctypes.create_string_buffer(16)
  347. _uuid_generate_time(_buffer)
  348. return UUID(bytes=_buffer.raw).node
  349. def _windll_getnode():
  350. """Get the hardware address on Windows using ctypes."""
  351. _buffer = ctypes.create_string_buffer(16)
  352. if _UuidCreate(_buffer) == 0:
  353. return UUID(bytes=_buffer.raw).node
  354. def _random_getnode():
  355. """Get a random node ID, with eighth bit set as suggested by RFC 4122."""
  356. import random
  357. return random.randrange(0, 1<<48L) | 0x010000000000L
  358. _node = None
  359. def getnode():
  360. """Get the hardware address as a 48-bit positive integer.
  361. The first time this runs, it may launch a separate program, which could
  362. be quite slow. If all attempts to obtain the hardware address fail, we
  363. choose a random 48-bit number with its eighth bit set to 1 as recommended
  364. in RFC 4122.
  365. """
  366. global _node
  367. if _node is not None:
  368. return _node
  369. import sys
  370. if sys.platform == 'win32':
  371. getters = [_windll_getnode, _netbios_getnode, _ipconfig_getnode]
  372. else:
  373. getters = [_unixdll_getnode, _ifconfig_getnode]
  374. for getter in getters + [_random_getnode]:
  375. try:
  376. _node = getter()
  377. except:
  378. continue
  379. if _node is not None:
  380. return _node
  381. _last_timestamp = None
  382. def uuid1(node=None, clock_seq=None):
  383. """Generate a UUID from a host ID, sequence number, and the current time.
  384. If 'node' is not given, getnode() is used to obtain the hardware
  385. address. If 'clock_seq' is given, it is used as the sequence number;
  386. otherwise a random 14-bit sequence number is chosen."""
  387. # When the system provides a version-1 UUID generator, use it (but don't
  388. # use UuidCreate here because its UUIDs don't conform to RFC 4122).
  389. if _uuid_generate_time and node is clock_seq is None:
  390. _buffer = ctypes.create_string_buffer(16)
  391. _uuid_generate_time(_buffer)
  392. return UUID(bytes=_buffer.raw)
  393. global _last_timestamp
  394. import time
  395. nanoseconds = int(time.time() * 1e9)
  396. # 0x01b21dd213814000 is the number of 100-ns intervals between the
  397. # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00.
  398. timestamp = int(nanoseconds/100) + 0x01b21dd213814000L
  399. if timestamp <= _last_timestamp:
  400. timestamp = _last_timestamp + 1
  401. _last_timestamp = timestamp
  402. if clock_seq is None:
  403. import random
  404. clock_seq = random.randrange(1<<14L) # instead of stable storage
  405. time_low = timestamp & 0xffffffffL
  406. time_mid = (timestamp >> 32L) & 0xffffL
  407. time_hi_version = (timestamp >> 48L) & 0x0fffL
  408. clock_seq_low = clock_seq & 0xffL
  409. clock_seq_hi_variant = (clock_seq >> 8L) & 0x3fL
  410. if node is None:
  411. node = getnode()
  412. return UUID(fields=(time_low, time_mid, time_hi_version,
  413. clock_seq_hi_variant, clock_seq_low, node), version=1)
  414. def uuid3(namespace, name):
  415. """Generate a UUID from the MD5 hash of a namespace UUID and a name."""
  416. from hashlib import md5
  417. hash = md5(namespace.bytes + name).digest()
  418. return UUID(bytes=hash[:16], version=3)
  419. def uuid4():
  420. """Generate a random UUID."""
  421. # When the system provides a version-4 UUID generator, use it.
  422. if _uuid_generate_random:
  423. _buffer = ctypes.create_string_buffer(16)
  424. _uuid_generate_random(_buffer)
  425. return UUID(bytes=_buffer.raw)
  426. # Otherwise, get randomness from urandom or the 'random' module.
  427. try:
  428. import os
  429. return UUID(bytes=os.urandom(16), version=4)
  430. except:
  431. import random
  432. bytes = [chr(random.randrange(256)) for i in range(16)]
  433. return UUID(bytes=bytes, version=4)
  434. def uuid5(namespace, name):
  435. """Generate a UUID from the SHA-1 hash of a namespace UUID and a name."""
  436. from hashlib import sha1
  437. hash = sha1(namespace.bytes + name).digest()
  438. return UUID(bytes=hash[:16], version=5)
  439. # The following standard UUIDs are for use with uuid3() or uuid5().
  440. NAMESPACE_DNS = UUID('6ba7b810-9dad-11d1-80b4-00c04fd430c8')
  441. NAMESPACE_URL = UUID('6ba7b811-9dad-11d1-80b4-00c04fd430c8')
  442. NAMESPACE_OID = UUID('6ba7b812-9dad-11d1-80b4-00c04fd430c8')
  443. NAMESPACE_X500 = UUID('6ba7b814-9dad-11d1-80b4-00c04fd430c8')