PageRenderTime 24ms CodeModel.GetById 11ms RepoModel.GetById 0ms app.codeStats 0ms

/mailcatcher/contrib/mailparser.rb

https://gitlab.com/jontow/cpx
Ruby | 423 lines | 382 code | 25 blank | 16 comment | 20 complexity | 131e0707636b7c83ce697df8b9c53fec MD5 | raw file
  1. # $Id: mailparser.rb,v 1.18 2006/07/10 16:34:55 tommy Exp $
  2. #
  3. # Copyright (C) 2003-2006 TOMITA Masahiro
  4. # tommy@tmtm.org
  5. #
  6. =begin license
  7. ************************************************
  8. THIS FILE CONTAINS LOCAL MODICIFATIONS NOT FOUND
  9. IN THE DISTRIBUTION LINKED BELOW
  10. ************************************************
  11. Licenced under ruby's license.
  12. Obtained from http://tmtm.org/ja/ruby/mailparser/
  13. http://www.ruby-lang.org/en/LICENSE.txt
  14. http://www.ruby-lang.org/ja/LICENSE.txt
  15. =end
  16. require "nkf"
  17. require "date"
  18. module MailParser
  19. @@output_charset = "euc-jp"
  20. @@text_body_only = false
  21. @@extract_message_type = true
  22. ConvertMethods = {
  23. "JE" => :jistoeuc,
  24. "SE" => :sjistoeuc,
  25. "UE" => :utf8toeuc,
  26. "EU" => :euctoutf8,
  27. "SU" => :sjistoutf8,
  28. "JU" => :jistoutf8,
  29. }
  30. Charsets = {
  31. "iso-2022-jp" => "J",
  32. "euc-jp" => "E",
  33. "shift_jis" => "S",
  34. "sjis" => "S",
  35. "x-sjis" => "S",
  36. "utf-8" => "U",
  37. "us-ascii" => "N",
  38. }
  39. module_function
  40. def euctoutf8(s)
  41. NKF.nkf("-m0Ewx", s)
  42. end
  43. def sjistoutf8(s)
  44. NKF.nkf("-m0Swx", s)
  45. end
  46. def jistoutf8(s)
  47. NKF.nkf("-m0Jwx", s)
  48. end
  49. def sjistoeuc(s)
  50. NKF.nkf("-m0Sex", s)
  51. end
  52. def jistoeuc(s)
  53. NKF.nkf("-m0Jex", s)
  54. end
  55. def utf8toeuc(s)
  56. NKF.nkf("-m0Wex", s)
  57. end
  58. def output_charset=(c)
  59. @@output_charset = c
  60. end
  61. def text_body_only=(f)
  62. @@text_body_only = f
  63. end
  64. def extract_message_type=(f)
  65. @@extract_message_type = f
  66. end
  67. def b64_hdecode(str)
  68. str.unpack("m")[0]
  69. end
  70. def b64_decode(str)
  71. str.unpack("m")[0]
  72. end
  73. def qp_hdecode(str)
  74. str.gsub("_", " ").gsub(/=([0-9A-F][0-9A-F])/no) do $1.hex.chr end
  75. end
  76. def qp_decode(str)
  77. str.gsub(/[ \t]+$/no, "").gsub(/=\r?\n/no, "").
  78. gsub(/=([0-9A-F][0-9A-F])/no) do $1.hex.chr end
  79. end
  80. def mdecode_token(s)
  81. if s !~ /\A=\?([a-z0-9_-]+)\?(Q|B)\?([^?]+)\?=\Z/nio then
  82. s
  83. else
  84. charset, encoding, text = $1, $2, $3
  85. fc = MailParser::Charsets[charset.downcase]
  86. if fc == nil then return s end
  87. if encoding.downcase == 'q' then
  88. s2 = qp_hdecode(text)
  89. else
  90. s2 = b64_hdecode(text)
  91. end
  92. tc = @@output_charset && MailParser::Charsets[@@output_charset.downcase]
  93. if fc == "N" or tc.nil? or fc == tc then return s2 end
  94. MailParser.send(MailParser::ConvertMethods[fc+tc], s2)
  95. end
  96. end
  97. def mime_header_decode(str)
  98. return str.gsub(/\s+/no, " ").gsub(/\?=\s+=\?/no, "?==?").gsub(/=\?[a-z0-9_-]+\?(Q|B)\?[^?]+\?=/nio){mdecode_token $&}
  99. end
  100. def trunc_comment(v)
  101. ret = ""
  102. after = v
  103. while not after.empty? and after =~ /^(\\.|\"(\\.|[^\\\"])*\"|[^\\\(])*/no do
  104. ret << $&
  105. after = $'
  106. if after =~ /^\(/no then
  107. a = trunc_comment_sub(after[1..-1])
  108. if a == nil then
  109. return ret+after
  110. end
  111. after = a
  112. end
  113. if after == "\\" then
  114. break
  115. end
  116. end
  117. ret+after
  118. end
  119. def trunc_comment_sub(orig)
  120. after = orig
  121. loop do
  122. if after =~ /^(\\.|[^\\\(\)])*/no then
  123. after = $'
  124. end
  125. if after =~ /^\)/no then
  126. return after[1..-1]
  127. end
  128. if after =~ /^\(/no then
  129. after = trunc_comment_sub(after[1..-1])
  130. if after == nil then
  131. return nil
  132. end
  133. next
  134. end
  135. return nil
  136. end
  137. end
  138. def split_address(v)
  139. a = []
  140. r = ""
  141. while not v.empty? do
  142. if v =~ /^(\s+|[0-9A-Za-z\!\#\$\%\&\'\*\+\-\/\=\?\^\_\`\{\|\}\~]+|\"(\\.|[^\\\"])*\")/ then
  143. r << $&
  144. v = $'
  145. elsif v[0] == ?, then
  146. a << r.strip
  147. r = ""
  148. v.slice!(0,1)
  149. else
  150. r << v.slice!(0,1)
  151. end
  152. end
  153. a << r.strip
  154. return a
  155. end
  156. def get_mail_address(v)
  157. v = trunc_comment(v)
  158. a = split_address(v)
  159. return a.map{|i| i.strip =~ /<([^<>]*)>$/ ? $1 : i.strip}
  160. end
  161. def get_date(s)
  162. if s =~ /^[A-Z][A-Z][A-Z]\s*,\s*/i then
  163. s = $'
  164. end
  165. d = DateTime._strptime(s, "%d %b %Y %X")
  166. return unless d
  167. Time.mktime(d[:year], d[:mon], d[:mday], d[:hour], d[:min], d[:sec]) rescue nil
  168. end
  169. def parse_content_type(str)
  170. hash = {}
  171. hash[:parameter] = {}
  172. if str.strip =~ /^([a-z0-9_-]+)(?:\/([a-z0-9_-]+))?\s*/nio then
  173. hash[:type] = $1.downcase
  174. hash[:subtype] = $2.downcase if $2
  175. params = $' #'
  176. pending = {}
  177. pending_ext = {}
  178. while true do
  179. if params =~ /;\s*([a-z0-9_-]+)(?:\*(\d+))?\s*=\s*(?:\"((?:\\\"|[^\"])*)\"|([^\s\(\)\<\>\@\,\;\:\\\"\/\[\]\?\=]*))\s*/nio then
  180. pn, ord, pv = $1, $2, $3||$4
  181. params = $'
  182. if ord then
  183. pending[pn] = [] unless pending.key? pn
  184. pending[pn] << [ord.to_i, pv]
  185. else
  186. hash[:parameter][pn.downcase] = pv
  187. end
  188. elsif params =~ /;\s*([a-z0-9_-]+)\*\s*=\s*([a-z0-9_-]+)?\'(?:[a-z0-9_-]+)?\'(?:\"((?:\\\"|[^\"])*)\"|([^\s\(\)\<\>\@\,\;\:\\\"\/\[\]\?\=]*))\s*/nio then
  189. pn, charset, pv = $1, $2, $3||$4
  190. params = $'
  191. pending_ext[pn] = [[0, pv, charset]]
  192. elsif params =~ /;\s*([a-z0-9_-]+)\*0\*\s*=\s*([a-z0-9_-]+)?\'(?:[a-z0-9_-]+)?\'(?:\"((?:\\\"|[^\"])*)\"|([^\s\(\)\<\>\@\,\;\:\\\"\/\[\]\?\=]*))\s*/nio then
  193. pn, charset, pv = $1, $2, $3||$4
  194. params = $'
  195. pending_ext[pn] = [] unless pending_ext.key? pn
  196. pending_ext[pn] = [[0, pv, charset]]
  197. elsif params =~ /;\s*([a-z0-9_-]+)\*(\d+)\*\s*=\s*(?:\"((?:\\\"|[^\"])*)\"|([^\s\(\)\<\>\@\,\;\:\\\"\/\[\]\?\=]*))\s*/nio then
  198. pn, ord, pv = $1, $2, $3||$4
  199. params = $'
  200. pending_ext[pn] = [] unless pending_ext.key? pn
  201. pending_ext[pn] << [ord.to_i, pv]
  202. else
  203. break
  204. end
  205. end
  206. pending.each do |pn, pv|
  207. hash[:parameter][pn.downcase] = pv.sort{|a,b| a[0]<=>b[0]}.map{|a|a[1]}.join
  208. end
  209. pending_ext.each do |pn, pv|
  210. pv = pv.sort{|a,b| a[0]<=>b[0]}
  211. charset = pv[0][2]
  212. v = pv.map{|a|a[1].gsub(/%([0-9A-F][0-9A-F])/){$1.hex.chr}}.join
  213. fc = MailParser::Charsets[charset.downcase] if charset
  214. tc = @@output_charset && MailParser::Charsets[@@output_charset.downcase]
  215. if fc and fc != "N" and fc != tc then
  216. v = MailParser.send(MailParser::ConvertMethods[fc+tc], v)
  217. end
  218. hash[:parameter][pn.downcase] = v
  219. end
  220. end
  221. return hash
  222. end
  223. def parse_content_disposition(str)
  224. return parse_content_type(str)
  225. end
  226. def parse_message(msg)
  227. class << msg
  228. def _each_with_multiple_delimiter(delim=[])
  229. @found_boundary = false
  230. loop do
  231. @l = gets
  232. if @l == nil then
  233. return
  234. end
  235. ll = @l.chomp
  236. if delim.include? ll then
  237. @found_boundary = true
  238. return
  239. end
  240. yield @l
  241. end
  242. end
  243. def last_line()
  244. @l && @l.chomp
  245. end
  246. attr_reader :found_boundary
  247. end
  248. m = parse_message2(msg)
  249. class << m
  250. def to_s()
  251. return <<EOS
  252. From: #{self[:from].join(",")}
  253. To: #{self[:to].join(",")}
  254. Subject:#{self[:subject]}
  255. Date: #{self[:date]}
  256. #{self[:body]}
  257. #{if self[:parts] then self[:parts].map{|p| "[#{p[:type]}/#{p[:subtype]}]<#{p[:filename]}>"}.join("\n") end}
  258. EOS
  259. end
  260. end
  261. return m
  262. end
  263. def parse_message2(msg, boundary=[])
  264. ret = parse_header(msg, boundary)
  265. return ret if msg.found_boundary
  266. if ret[:type] == "message" and @@extract_message_type then
  267. m = parse_message2(msg, boundary)
  268. ret[:message] = m
  269. elsif ret[:multipart] and ret[:boundary] then
  270. parts = []
  271. b = ret[:boundary]
  272. bd = boundary + ["--"+b+"--", "--"+b]
  273. msg._each_with_multiple_delimiter(bd) do end # skip preamble
  274. while msg.last_line == bd[-1] do
  275. m = parse_message2(msg, bd)
  276. parts << m
  277. end
  278. if msg.last_line == bd[-2] then
  279. msg._each_with_multiple_delimiter(boundary) do end
  280. end
  281. ret[:parts] = parts
  282. else
  283. if not @@text_body_only or ret[:type] == "text" or ret[:type].nil? then
  284. body = ""
  285. msg._each_with_multiple_delimiter(boundary) do |l|
  286. body << l
  287. end
  288. ret[:body] = decode_body(body, ret[:encoding], ret[:charset])
  289. else
  290. msg._each_with_multiple_delimiter(boundary) do end
  291. end
  292. end
  293. return ret
  294. end
  295. def parse_header(msg, boundary=[])
  296. ret = {}
  297. raw = ""
  298. header = []
  299. msg._each_with_multiple_delimiter(boundary) do |l|
  300. l.chomp!
  301. break if l.empty?
  302. raw << l+"\n"
  303. if l =~ /^\s/no and not header.empty? then
  304. header[-1] << l
  305. elsif not l.include? ":"
  306. next # skip garbage
  307. else
  308. header << l
  309. end
  310. end
  311. from = []
  312. to = []
  313. cc = []
  314. date = nil
  315. subject = ""
  316. encoding = ct = charset = multipart = body = filename = bd = nil
  317. h = {}
  318. header.each do |str|
  319. hn, hb = str.split(/:\s*/no, 2)
  320. hn.downcase!
  321. h[hn] = [] unless h.key? hn
  322. h[hn] << mime_header_decode(hb)
  323. case hn.downcase
  324. when "from"
  325. from.concat get_mail_address(hb)
  326. when "to"
  327. to.concat get_mail_address(hb)
  328. when "cc"
  329. cc.concat get_mail_address(hb)
  330. when "date"
  331. date = get_date(hb)
  332. when "subject"
  333. subject.concat hb
  334. when "content-type"
  335. ct = parse_content_type(hb)
  336. if ct[:type] == "text" then
  337. charset = ct[:parameter]["charset"]
  338. elsif ct[:type] == "multipart" then
  339. multipart = true
  340. bd = ct[:parameter]["boundary"]
  341. end
  342. filename = mime_header_decode(ct[:parameter]["name"]) if ct[:parameter]["name"]
  343. when "content-disposition"
  344. cd = parse_content_disposition(hb)
  345. filename = mime_header_decode(cd[:parameter]["filename"]) if cd[:parameter]["filename"]
  346. when "content-transfer-encoding"
  347. encoding = hb.strip.downcase
  348. end
  349. end
  350. ret[:from] = from
  351. ret[:to] = to
  352. ret[:cc] = cc
  353. ret[:date] = date
  354. ret[:subject] = mime_header_decode subject
  355. if ct then
  356. ret[:type] = ct[:type].downcase if ct[:type]
  357. ret[:subtype] = ct[:subtype].downcase if ct[:subtype]
  358. ret[:charset] = charset.downcase if charset
  359. end
  360. ret[:encoding] = encoding if encoding
  361. ret[:multipart] = multipart
  362. ret[:boundary] = bd
  363. ret[:filename] = filename if filename
  364. ret[:header] = h
  365. ret[:rawheader] = raw
  366. return ret
  367. end
  368. def decode_body(body, encoding, charset)
  369. case encoding
  370. when "base64"
  371. body = b64_decode body
  372. when "quoted-printable"
  373. body = qp_decode body
  374. end
  375. if charset == nil then return body end
  376. fc = MailParser::Charsets[charset.downcase]
  377. if fc == nil then return body end
  378. tc = @@output_charset && MailParser::Charsets[@@output_charset.downcase]
  379. if fc == "N" or tc.nil? or fc == tc then return body end
  380. MailParser.send(MailParser::ConvertMethods[fc+tc], body)
  381. end
  382. end