PageRenderTime 3881ms CodeModel.GetById 37ms RepoModel.GetById 4ms app.codeStats 1ms

/lang/ruby/lib/avro/schema.rb

http://github.com/thirumg/Avro.NET
Ruby | 427 lines | 355 code | 47 blank | 25 comment | 48 complexity | 68d95e7c8b2835c88173d14eef5bd24a MD5 | raw file
Possible License(s): Apache-2.0, JSON
  1. # Licensed to the Apache Software Foundation (ASF) under one
  2. # or more contributor license agreements. See the NOTICE file
  3. # distributed with this work for additional information
  4. # regarding copyright ownership. The ASF licenses this file
  5. # to you under the Apache License, Version 2.0 (the
  6. # "License"); you may not use this file except in compliance
  7. # with the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. module Avro
  17. class Schema
  18. # FIXME turn these into symbols to prevent some gc pressure
  19. PRIMITIVE_TYPES = Set.new(%w[null boolean string bytes int long float double])
  20. NAMED_TYPES = Set.new(%w[fixed enum record error])
  21. VALID_TYPES = PRIMITIVE_TYPES + NAMED_TYPES + Set.new(%w[array map union request])
  22. INT_MIN_VALUE = -(1 << 31)
  23. INT_MAX_VALUE = (1 << 31) - 1
  24. LONG_MIN_VALUE = -(1 << 63)
  25. LONG_MAX_VALUE = (1 << 63) - 1
  26. def self.parse(json_string)
  27. real_parse(Yajl.load(json_string), {})
  28. end
  29. # Build Avro Schema from data parsed out of JSON string.
  30. def self.real_parse(json_obj, names=nil)
  31. if json_obj.is_a? Hash
  32. type = json_obj['type']
  33. if PRIMITIVE_TYPES.include?(type)
  34. return PrimitiveSchema.new(type)
  35. elsif NAMED_TYPES.include? type
  36. name = json_obj['name']
  37. namespace = json_obj['namespace']
  38. case type
  39. when 'fixed'
  40. size = json_obj['size']
  41. return FixedSchema.new(name, namespace, size, names)
  42. when 'enum'
  43. symbols = json_obj['symbols']
  44. return EnumSchema.new(name, namespace, symbols, names)
  45. when 'record', 'error'
  46. fields = json_obj['fields']
  47. return RecordSchema.new(name, namespace, fields, names, type)
  48. else
  49. raise SchemaParseError.new("Unknown named type: #{type}")
  50. end
  51. elsif VALID_TYPES.include?(type)
  52. case type
  53. when 'array'
  54. return ArraySchema.new(json_obj['items'], names)
  55. when 'map'
  56. return MapSchema.new(json_obj['values'], names)
  57. else
  58. raise SchemaParseError.new("Unknown Valid Type: #{type}")
  59. end
  60. elsif type.nil?
  61. raise SchemaParseError.new("No \"type\" property: #{json_obj}")
  62. else
  63. raise SchemaParseError.new("Undefined type: #{type}")
  64. end
  65. elsif json_obj.is_a? Array
  66. # JSON array (union)
  67. return UnionSchema.new(json_obj, names)
  68. elsif PRIMITIVE_TYPES.include? json_obj
  69. return PrimitiveSchema.new(json_obj)
  70. else
  71. msg = "#{json_obj.inspect} is not a schema we know about."
  72. raise SchemaParseError.new(msg)
  73. end
  74. end
  75. # Determine if a ruby datum is an instance of a schema
  76. def self.validate(expected_schema, datum)
  77. case expected_schema.type
  78. when 'null'
  79. datum.nil?
  80. when 'boolean'
  81. datum == true || datum == false
  82. when 'string', 'bytes'
  83. datum.is_a? String
  84. when 'int'
  85. (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
  86. (INT_MIN_VALUE <= datum) && (datum <= INT_MAX_VALUE)
  87. when 'long'
  88. (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
  89. (LONG_MIN_VALUE <= datum) && (datum <= LONG_MAX_VALUE)
  90. when 'float', 'double'
  91. datum.is_a?(Float) || datum.is_a?(Fixnum) || datum.is_a?(Bignum)
  92. when 'fixed'
  93. datum.is_a?(String) && datum.size == expected_schema.size
  94. when 'enum'
  95. expected_schema.symbols.include? datum
  96. when 'array'
  97. datum.is_a?(Array) &&
  98. datum.all?{|d| validate(expected_schema.items, d) }
  99. when 'map':
  100. datum.keys.all?{|k| k.is_a? String } &&
  101. datum.values.all?{|v| validate(expected_schema.values, v) }
  102. when 'union'
  103. expected_schema.schemas.any?{|s| validate(s, datum) }
  104. when 'record', 'error', 'request'
  105. datum.is_a?(Hash) &&
  106. expected_schema.fields.all?{|f| validate(f.type, datum[f.name]) }
  107. else
  108. raise "you suck #{expected_schema.inspect} is not allowed."
  109. end
  110. end
  111. def initialize(type)
  112. @type = type
  113. end
  114. def type; @type; end
  115. def ==(other, seen=nil)
  116. other.is_a?(Schema) && @type == other.type
  117. end
  118. def hash(seen=nil)
  119. @type.hash
  120. end
  121. def subparse(json_obj, names=nil)
  122. begin
  123. Schema.real_parse(json_obj, names)
  124. rescue => e
  125. raise e if e.is_a? SchemaParseError
  126. raise SchemaParseError, "Sub-schema for #{self.class.name} not a valid Avro schema. Bad schema: #{json_obj}"
  127. end
  128. end
  129. def to_hash
  130. {'type' => @type}
  131. end
  132. def to_s
  133. Yajl.dump to_hash
  134. end
  135. class NamedSchema < Schema
  136. attr_reader :name, :namespace
  137. def initialize(type, name, namespace=nil, names=nil)
  138. super(type)
  139. @name, @namespace = Name.extract_namespace(name, namespace)
  140. names = Name.add_name(names, self)
  141. end
  142. def to_hash
  143. props = {'name' => @name}
  144. props.merge!('namespace' => @namespace) if @namespace
  145. super.merge props
  146. end
  147. def fullname
  148. Name.make_fullname(@name, @namespace)
  149. end
  150. end
  151. class RecordSchema < NamedSchema
  152. attr_reader :fields
  153. def self.make_field_objects(field_data, names)
  154. field_objects, field_names = [], Set.new
  155. field_data.each_with_index do |field, i|
  156. if field.respond_to?(:[]) # TODO(jmhodges) wtffffff
  157. type = field['type']
  158. name = field['name']
  159. default = field['default']
  160. order = field['order']
  161. new_field = Field.new(type, name, default, order, names)
  162. # make sure field name has not been used yet
  163. if field_names.include?(new_field.name)
  164. raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use"
  165. end
  166. field_names << new_field.name
  167. else
  168. raise SchemaParseError, "Not a valid field: #{field}"
  169. end
  170. field_objects << new_field
  171. end
  172. field_objects
  173. end
  174. def initialize(name, namespace, fields, names=nil, schema_type='record')
  175. if schema_type == 'request'
  176. @type = schema_type
  177. else
  178. super(schema_type, name, namespace, names)
  179. end
  180. @fields = RecordSchema.make_field_objects(fields, names)
  181. end
  182. def fields_hash
  183. fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
  184. end
  185. def to_hash
  186. hsh = super.merge('fields' => @fields.map {|f|Yajl.load(f.to_s)} )
  187. if type == 'request'
  188. hsh['fields']
  189. else
  190. hsh
  191. end
  192. end
  193. end
  194. class ArraySchema < Schema
  195. attr_reader :items, :items_schema_from_names
  196. def initialize(items, names=nil)
  197. @items_schema_from_names = false
  198. super('array')
  199. if items.is_a?(String) && names.has_key?(items)
  200. @items = names[items]
  201. @items_schema_from_names = true
  202. else
  203. @items = subparse(items, names)
  204. end
  205. end
  206. def to_hash
  207. name_or_json = if items_schema_from_names
  208. items.fullname
  209. else
  210. Yajl.load(items.to_s)
  211. end
  212. super.merge('items' => name_or_json)
  213. end
  214. end
  215. class MapSchema < Schema
  216. attr_reader :values, :values_schema_from_names
  217. def initialize(values, names=nil)
  218. @values_schema_from_names = false
  219. super('map')
  220. if values.is_a?(String) && names.has_key?(values)
  221. values_schema = names[values]
  222. @values_schema_from_names = true
  223. else
  224. values_schema = subparse(values, names)
  225. end
  226. @values = values_schema
  227. end
  228. def to_hash
  229. to_dump = super
  230. if values_schema_from_names
  231. to_dump['values'] = values
  232. else
  233. to_dump['values'] = Yajl.load(values.to_s)
  234. end
  235. to_dump
  236. end
  237. end
  238. class UnionSchema < Schema
  239. attr_reader :schemas, :schema_from_names_indices
  240. def initialize(schemas, names=nil)
  241. super('union')
  242. schema_objects = []
  243. @schema_from_names_indices = []
  244. schemas.each_with_index do |schema, i|
  245. from_names = false
  246. if schema.is_a?(String) && names.has_key?(schema)
  247. new_schema = names[schema]
  248. from_names = true
  249. else
  250. new_schema = subparse(schema, names)
  251. end
  252. ns_type = new_schema.type
  253. if VALID_TYPES.include?(ns_type) &&
  254. !NAMED_TYPES.include?(ns_type) &&
  255. schema_objects.map{|o| o.type }.include?(ns_type)
  256. raise SchemaParseError, "#{ns_type} is already in Union"
  257. elsif ns_type == 'union'
  258. raise SchemaParseError, "Unions cannot contain other unions"
  259. else
  260. schema_objects << new_schema
  261. @schema_from_names_indices << i if from_names
  262. end
  263. @schemas = schema_objects
  264. end
  265. end
  266. def to_s
  267. # FIXME(jmhodges) this from_name pattern is really weird and
  268. # seems code-smelly.
  269. to_dump = []
  270. schemas.each_with_index do |schema, i|
  271. if schema_from_names_indices.include?(i)
  272. to_dump << schema.fullname
  273. else
  274. to_dump << Yajl.load(schema.to_s)
  275. end
  276. end
  277. Yajl.dump(to_dump)
  278. end
  279. end
  280. class EnumSchema < NamedSchema
  281. attr_reader :symbols
  282. def initialize(name, space, symbols, names=nil)
  283. if symbols.uniq.length < symbols.length
  284. fail_msg = 'Duplicate symbol: %s' % symbols
  285. raise Avro::SchemaParseError, fail_msg
  286. end
  287. super('enum', name, space, names)
  288. @symbols = symbols
  289. end
  290. def to_hash
  291. super.merge('symbols' => symbols)
  292. end
  293. end
  294. # Valid primitive types are in PRIMITIVE_TYPES.
  295. class PrimitiveSchema < Schema
  296. def initialize(type)
  297. unless PRIMITIVE_TYPES.include? type
  298. raise AvroError.new("#{type} is not a valid primitive type.")
  299. end
  300. super(type)
  301. end
  302. def to_s
  303. to_hash.size == 1 ? type.inspect : Yajl.dump(to_hash)
  304. end
  305. end
  306. class FixedSchema < NamedSchema
  307. attr_reader :size
  308. def initialize(name, space, size, names=nil)
  309. # Ensure valid cto args
  310. unless size.is_a?(Fixnum) || size.is_a?(Bignum)
  311. raise AvroError, 'Fixed Schema requires a valid integer for size property.'
  312. end
  313. super('fixed', name, space, names)
  314. @size = size
  315. end
  316. def to_hash
  317. super.merge('size' => @size)
  318. end
  319. end
  320. class Field < Schema
  321. attr_reader :type, :name, :default, :order, :type_from_names
  322. def initialize(type, name, default=nil, order=nil, names=nil)
  323. @type_from_names = false
  324. if type.is_a?(String) && names && names.has_key?(type)
  325. type_schema = names[type]
  326. @type_from_names = true
  327. else
  328. type_schema = subparse(type, names)
  329. end
  330. @type = type_schema
  331. @name = name
  332. @default = default
  333. @order = order
  334. end
  335. def to_hash
  336. sigh_type = type_from_names ? type.fullname : Yajl.load(type.to_s)
  337. hsh = {
  338. 'name' => name,
  339. 'type' => sigh_type
  340. }
  341. hsh['default'] = default if default
  342. hsh['order'] = order if order
  343. hsh
  344. end
  345. def to_s
  346. Yajl.dump(to_hash)
  347. end
  348. end
  349. end
  350. class SchemaParseError < AvroError; end
  351. module Name
  352. def self.extract_namespace(name, namespace)
  353. parts = name.split('.')
  354. if parts.size > 1
  355. namespace, name = parts[0..-2].join('.'), parts.last
  356. end
  357. return name, namespace
  358. end
  359. # Add a new schema object to the names dictionary (in place).
  360. def self.add_name(names, new_schema)
  361. new_fullname = new_schema.fullname
  362. if Avro::Schema::VALID_TYPES.include?(new_fullname)
  363. raise SchemaParseError, "#{new_fullname} is a reserved type name."
  364. elsif names.nil?
  365. names = {}
  366. elsif names.has_key?(new_fullname)
  367. raise SchemaParseError, "The name \"#{new_fullname}\" is already in use."
  368. end
  369. names[new_fullname] = new_schema
  370. names
  371. end
  372. def self.make_fullname(name, namespace)
  373. if !name.include?('.') && !namespace.nil?
  374. namespace + '.' + name
  375. else
  376. name
  377. end
  378. end
  379. end
  380. end