PageRenderTime 41ms CodeModel.GetById 10ms RepoModel.GetById 1ms app.codeStats 0ms

/mio/libraries/CocoaHttpServer/Mime/MultipartFormDataParser.m

https://gitlab.com/base.io/mio
Objective C | 523 lines | 368 code | 83 blank | 72 comment | 110 complexity | d2dfe55359b68c88f9fa66c1da502b45 MD5 | raw file
  1. #import "MultipartFormDataParser.h"
  2. #import "DDData.h"
  3. #import "HTTPLogging.h"
  4. //-----------------------------------------------------------------
  5. #pragma mark log level
  6. #ifdef DEBUG
  7. static const int httpLogLevel = HTTP_LOG_LEVEL_WARN;
  8. #else
  9. static const int httpLogLevel = HTTP_LOG_LEVEL_WARN;
  10. #endif
  11. //-----------------------------------------------------------------
  12. // interface MultipartFormDataParser (private)
  13. //-----------------------------------------------------------------
  14. @interface MultipartFormDataParser (private)
  15. + (NSData*) decodedDataFromData:(NSData*) data encoding:(int) encoding;
  16. - (int) findHeaderEnd:(NSData*) workingData fromOffset:(int) offset;
  17. - (int) findContentEnd:(NSData*) data fromOffset:(int) offset;
  18. - (int) numberOfBytesToLeavePendingWithData:(NSData*) data length:(int) length encoding:(int) encoding;
  19. - (int) offsetTillNewlineSinceOffset:(int) offset inData:(NSData*) data;
  20. - (int) processPreamble:(NSData*) workingData;
  21. @end
  22. //-----------------------------------------------------------------
  23. // implementation MultipartFormDataParser
  24. //-----------------------------------------------------------------
  25. @implementation MultipartFormDataParser
  26. @synthesize delegate,formEncoding;
  27. - (id) initWithBoundary:(NSString*) boundary formEncoding:(NSStringEncoding) _formEncoding {
  28. if( nil == (self = [super init]) ){
  29. return self;
  30. }
  31. if( nil == boundary ) {
  32. HTTPLogWarn(@"MultipartFormDataParser: init with zero boundary");
  33. return nil;
  34. }
  35. boundaryData = [[@"\r\n--" stringByAppendingString:boundary] dataUsingEncoding:NSASCIIStringEncoding];
  36. pendingData = [[NSMutableData alloc] init];
  37. currentEncoding = contentTransferEncoding_binary;
  38. currentHeader = nil;
  39. formEncoding = _formEncoding;
  40. reachedEpilogue = NO;
  41. processedPreamble = NO;
  42. return self;
  43. }
  44. - (BOOL) appendData:(NSData *)data {
  45. // Can't parse without boundary;
  46. if( nil == boundaryData ) {
  47. HTTPLogError(@"MultipartFormDataParser: Trying to parse multipart without specifying a valid boundary");
  48. assert(false);
  49. return NO;
  50. }
  51. NSData* workingData = data;
  52. if( pendingData.length ) {
  53. [pendingData appendData:data];
  54. workingData = pendingData;
  55. }
  56. // the parser saves parse stat in the offset variable, which indicates offset of unhandled part in
  57. // currently received chunk. Before returning, we always drop all data up to offset, leaving
  58. // only unhandled for the next call
  59. int offset = 0;
  60. // don't parse data unless its size is greater then boundary length, so we couldn't
  61. // misfind the boundary, if it got split into different data chunks
  62. int sizeToLeavePending = boundaryData.length;
  63. if( !reachedEpilogue && workingData.length <= sizeToLeavePending ) {
  64. // not enough data even to start parsing.
  65. // save to pending data.
  66. if( !pendingData.length ) {
  67. [pendingData appendData:data];
  68. }
  69. if( checkForContentEnd ) {
  70. if( pendingData.length >= 2 ) {
  71. if( *(uint16_t*)(pendingData.bytes + offset) == 0x2D2D ) {
  72. // we found the multipart end. all coming next is an epilogue.
  73. HTTPLogVerbose(@"MultipartFormDataParser: End of multipart message");
  74. waitingForCRLF = YES;
  75. reachedEpilogue = YES;
  76. offset+= 2;
  77. }
  78. else {
  79. checkForContentEnd = NO;
  80. waitingForCRLF = YES;
  81. return YES;
  82. }
  83. } else {
  84. return YES;
  85. }
  86. }
  87. else {
  88. return YES;
  89. }
  90. }
  91. while( true ) {
  92. if( checkForContentEnd ) {
  93. // the flag will be raised to check if the last part was the last one.
  94. if( offset < workingData.length -1 ) {
  95. char* bytes = (char*) workingData.bytes;
  96. if( *(uint16_t*)(bytes + offset) == 0x2D2D ) {
  97. // we found the multipart end. all coming next is an epilogue.
  98. HTTPLogVerbose(@"MultipartFormDataParser: End of multipart message");
  99. checkForContentEnd = NO;
  100. reachedEpilogue = YES;
  101. // still wait for CRLF, that comes after boundary, but before epilogue.
  102. waitingForCRLF = YES;
  103. offset += 2;
  104. }
  105. else {
  106. // it's not content end, we have to wait till separator line end before next part comes
  107. waitingForCRLF = YES;
  108. checkForContentEnd = NO;
  109. }
  110. }
  111. else {
  112. // we haven't got enough data to check for content end.
  113. // save current unhandled data (it may be 1 byte) to pending and recheck on next chunk received
  114. if( offset < workingData.length ) {
  115. [pendingData setData:[NSData dataWithBytes:workingData.bytes + workingData.length-1 length:1]];
  116. }
  117. else {
  118. // there is no unhandled data now, wait for more chunks
  119. [pendingData setData:[NSData data]];
  120. }
  121. return YES;
  122. }
  123. }
  124. if( waitingForCRLF ) {
  125. // the flag will be raised in the code below, meaning, we've read the boundary, but
  126. // didnt find the end of boundary line yet.
  127. offset = [self offsetTillNewlineSinceOffset:offset inData:workingData];
  128. if( -1 == offset ) {
  129. // didnt find the endl again.
  130. if( offset ) {
  131. // we still have to save the unhandled data (maybe it's 1 byte CR)
  132. if( *((char*)workingData.bytes + workingData.length -1) == '\r' ) {
  133. [pendingData setData:[NSData dataWithBytes:workingData.bytes + workingData.length-1 length:1]];
  134. }
  135. else {
  136. // or save nothing if it wasnt
  137. [pendingData setData:[NSData data]];
  138. }
  139. }
  140. return YES;
  141. }
  142. waitingForCRLF = NO;
  143. }
  144. if( !processedPreamble ) {
  145. // got to find the first boundary before the actual content begins.
  146. offset = [self processPreamble:workingData];
  147. // wait for more data for preamble
  148. if( -1 == offset )
  149. return YES;
  150. // invoke continue to skip newline after boundary.
  151. continue;
  152. }
  153. if( reachedEpilogue ) {
  154. // parse all epilogue data to delegate.
  155. if( [delegate respondsToSelector:@selector(processEpilogueData:)] ) {
  156. NSData* epilogueData = [NSData dataWithBytesNoCopy: (char*) workingData.bytes + offset length: workingData.length - offset freeWhenDone:NO];
  157. [delegate processEpilogueData: epilogueData];
  158. }
  159. return YES;
  160. }
  161. if( nil == currentHeader ) {
  162. // nil == currentHeader is a state flag, indicating we are waiting for header now.
  163. // whenever part is over, currentHeader is set to nil.
  164. // try to find CRLFCRLF bytes in the data, which indicates header end.
  165. // we won't parse header parts, as they won't be too large.
  166. int headerEnd = [self findHeaderEnd:workingData fromOffset:offset];
  167. if( -1 == headerEnd ) {
  168. // didn't recieve the full header yet.
  169. if( !pendingData.length) {
  170. // store the unprocessed data till next chunks come
  171. [pendingData appendBytes:data.bytes + offset length:data.length - offset];
  172. }
  173. else {
  174. if( offset ) {
  175. // save the current parse state; drop all handled data and save unhandled only.
  176. pendingData = [[NSMutableData alloc] initWithBytes: (char*) workingData.bytes + offset length:workingData.length - offset];
  177. }
  178. }
  179. return YES;
  180. }
  181. else {
  182. // let the header parser do it's job from now on.
  183. NSData * headerData = [NSData dataWithBytesNoCopy: (char*) workingData.bytes + offset length:headerEnd + 2 - offset freeWhenDone:NO];
  184. currentHeader = [[MultipartMessageHeader alloc] initWithData:headerData formEncoding:formEncoding];
  185. if( nil == currentHeader ) {
  186. // we've found the data is in wrong format.
  187. HTTPLogError(@"MultipartFormDataParser: MultipartFormDataParser: wrong input format, coulnd't get a valid header");
  188. return NO;
  189. }
  190. if( [delegate respondsToSelector:@selector(processStartOfPartWithHeader:)] ) {
  191. [delegate processStartOfPartWithHeader:currentHeader];
  192. }
  193. HTTPLogVerbose(@"MultipartFormDataParser: MultipartFormDataParser: Retrieved part header.");
  194. }
  195. // skip the two trailing \r\n, in addition to the whole header.
  196. offset = headerEnd + 4;
  197. }
  198. // after we've got the header, we try to
  199. // find the boundary in the data.
  200. int contentEnd = [self findContentEnd:workingData fromOffset:offset];
  201. if( contentEnd == -1 ) {
  202. // this case, we didn't find the boundary, so the data is related to the current part.
  203. // we leave the sizeToLeavePending amount of bytes to make sure we don't include
  204. // boundary part in processed data.
  205. int sizeToPass = workingData.length - offset - sizeToLeavePending;
  206. // if we parse BASE64 encoded data, or Quoted-Printable data, we will make sure we don't break the format
  207. int leaveTrailing = [self numberOfBytesToLeavePendingWithData:data length:sizeToPass encoding:currentEncoding];
  208. sizeToPass -= leaveTrailing;
  209. if( sizeToPass <= 0 ) {
  210. // wait for more data!
  211. if( offset ) {
  212. [pendingData setData:[NSData dataWithBytes:(char*) workingData.bytes + offset length:workingData.length - offset]];
  213. }
  214. return YES;
  215. }
  216. // decode the chunk and let the delegate use it (store in a file, for example)
  217. NSData* decodedData = [MultipartFormDataParser decodedDataFromData:[NSData dataWithBytesNoCopy:(char*)workingData.bytes + offset length:workingData.length - offset - sizeToLeavePending freeWhenDone:NO] encoding:currentEncoding];
  218. if( [delegate respondsToSelector:@selector(processContent:WithHeader:)] ) {
  219. HTTPLogVerbose(@"MultipartFormDataParser: Processed %d bytes of body",sizeToPass);
  220. [delegate processContent: decodedData WithHeader:currentHeader];
  221. }
  222. // store the unprocessed data till the next chunks come.
  223. [pendingData setData:[NSData dataWithBytes:(char*)workingData.bytes + workingData.length - sizeToLeavePending length:sizeToLeavePending]];
  224. return YES;
  225. }
  226. else {
  227. // Here we found the boundary.
  228. // let the delegate process it, and continue going to the next parts.
  229. if( [delegate respondsToSelector:@selector(processContent:WithHeader:)] ) {
  230. [delegate processContent:[NSData dataWithBytesNoCopy:(char*) workingData.bytes + offset length:contentEnd - offset freeWhenDone:NO] WithHeader:currentHeader];
  231. }
  232. if( [delegate respondsToSelector:@selector(processEndOfPartWithHeader:)] ){
  233. [delegate processEndOfPartWithHeader:currentHeader];
  234. HTTPLogVerbose(@"MultipartFormDataParser: End of body part");
  235. }
  236. currentHeader = nil;
  237. // set up offset to continue with the remaining data (if any)
  238. offset = contentEnd + boundaryData.length;
  239. checkForContentEnd = YES;
  240. // setting the flag tells the parser to skip all the data till CRLF
  241. }
  242. }
  243. return YES;
  244. }
  245. //-----------------------------------------------------------------
  246. #pragma mark private methods
  247. - (int) offsetTillNewlineSinceOffset:(int) offset inData:(NSData*) data {
  248. char* bytes = (char*) data.bytes;
  249. int length = data.length;
  250. if( offset >= length - 1 )
  251. return -1;
  252. while ( *(uint16_t*)(bytes + offset) != 0x0A0D ) {
  253. // find the trailing \r\n after the boundary. The boundary line might have any number of whitespaces before CRLF, according to rfc2046
  254. // in debug, we might also want to know, if the file is somehow misformatted.
  255. #ifdef DEBUG
  256. if( !isspace(*(bytes+offset)) ) {
  257. HTTPLogWarn(@"MultipartFormDataParser: Warning, non-whitespace character '%c' between boundary bytes and CRLF in boundary line",*(bytes+offset) );
  258. }
  259. if( !isspace(*(bytes+offset+1)) ) {
  260. HTTPLogWarn(@"MultipartFormDataParser: Warning, non-whitespace character '%c' between boundary bytes and CRLF in boundary line",*(bytes+offset+1) );
  261. }
  262. #endif
  263. offset++;
  264. if( offset >= length ) {
  265. // no endl found within current data
  266. return -1;
  267. }
  268. }
  269. offset += 2;
  270. return offset;
  271. }
  272. - (int) processPreamble:(NSData*) data {
  273. int offset = 0;
  274. char* boundaryBytes = (char*) boundaryData.bytes + 2; // the first boundary won't have CRLF preceding.
  275. char* dataBytes = (char*) data.bytes;
  276. int boundaryLength = boundaryData.length - 2;
  277. int dataLength = data.length;
  278. // find the boundary without leading CRLF.
  279. while( offset < dataLength - boundaryLength +1 ) {
  280. int i;
  281. for( i = 0;i < boundaryLength; i++ ) {
  282. if( boundaryBytes[i] != dataBytes[offset + i] )
  283. break;
  284. }
  285. if( i == boundaryLength ) {
  286. break;
  287. }
  288. offset++;
  289. }
  290. if( offset == dataLength ) {
  291. // the end of preamble wasn't found in this chunk
  292. int sizeToProcess = dataLength - boundaryLength;
  293. if( sizeToProcess > 0) {
  294. if( [delegate respondsToSelector:@selector(processPreambleData:)] ) {
  295. NSData* preambleData = [NSData dataWithBytesNoCopy: (char*) data.bytes length: data.length - offset - boundaryLength freeWhenDone:NO];
  296. [delegate processPreambleData:preambleData];
  297. HTTPLogVerbose(@"MultipartFormDataParser: processed preamble");
  298. }
  299. pendingData = [NSMutableData dataWithBytes: data.bytes + data.length - boundaryLength length:boundaryLength];
  300. }
  301. return -1;
  302. }
  303. else {
  304. if ( offset && [delegate respondsToSelector:@selector(processPreambleData:)] ) {
  305. NSData* preambleData = [NSData dataWithBytesNoCopy: (char*) data.bytes length: offset freeWhenDone:NO];
  306. [delegate processPreambleData:preambleData];
  307. }
  308. offset +=boundaryLength;
  309. // tells to skip CRLF after the boundary.
  310. processedPreamble = YES;
  311. waitingForCRLF = YES;
  312. }
  313. return offset;
  314. }
  315. - (int) findHeaderEnd:(NSData*) workingData fromOffset:(int)offset {
  316. char* bytes = (char*) workingData.bytes;
  317. int inputLength = workingData.length;
  318. uint16_t separatorBytes = 0x0A0D;
  319. while( true ) {
  320. if(inputLength < offset + 3 ) {
  321. // wait for more data
  322. return -1;
  323. }
  324. if( (*((uint16_t*) (bytes+offset)) == separatorBytes) && (*((uint16_t*) (bytes+offset)+1) == separatorBytes) ) {
  325. return offset;
  326. }
  327. offset++;
  328. }
  329. return -1;
  330. }
  331. - (int) findContentEnd:(NSData*) data fromOffset:(int) offset {
  332. char* boundaryBytes = (char*) boundaryData.bytes;
  333. char* dataBytes = (char*) data.bytes;
  334. int boundaryLength = boundaryData.length;
  335. int dataLength = data.length;
  336. while( offset < dataLength - boundaryLength +1 ) {
  337. int i;
  338. for( i = 0;i < boundaryLength; i++ ) {
  339. if( boundaryBytes[i] != dataBytes[offset + i] )
  340. break;
  341. }
  342. if( i == boundaryLength ) {
  343. return offset;
  344. }
  345. offset++;
  346. }
  347. return -1;
  348. }
  349. - (int) numberOfBytesToLeavePendingWithData:(NSData*) data length:(int) length encoding:(int) encoding {
  350. // If we have BASE64 or Quoted-Printable encoded data, we have to be sure
  351. // we don't break the format.
  352. int sizeToLeavePending = 0;
  353. if( encoding == contentTransferEncoding_base64 ) {
  354. char* bytes = (char*) data.bytes;
  355. int i;
  356. for( i = length - 1; i > 0; i++ ) {
  357. if( * (uint16_t*) (bytes + i) == 0x0A0D ) {
  358. break;
  359. }
  360. }
  361. // now we've got to be sure that the length of passed data since last line
  362. // is multiplier of 4.
  363. sizeToLeavePending = (length - i) & ~0x11; // size to leave pending = length-i - (length-i) %4;
  364. return sizeToLeavePending;
  365. }
  366. if( encoding == contentTransferEncoding_quotedPrintable ) {
  367. // we don't pass more less then 3 bytes anyway.
  368. if( length <= 2 )
  369. return length;
  370. // check the last bytes to be start of encoded symbol.
  371. const char* bytes = data.bytes + length - 2;
  372. if( bytes[0] == '=' )
  373. return 2;
  374. if( bytes[1] == '=' )
  375. return 1;
  376. return 0;
  377. }
  378. return 0;
  379. }
  380. //-----------------------------------------------------------------
  381. #pragma mark decoding
  382. + (NSData*) decodedDataFromData:(NSData*) data encoding:(int) encoding {
  383. switch (encoding) {
  384. case contentTransferEncoding_base64: {
  385. return [data base64Decoded];
  386. } break;
  387. case contentTransferEncoding_quotedPrintable: {
  388. return [self decodedDataFromQuotedPrintableData:data];
  389. } break;
  390. default: {
  391. return data;
  392. } break;
  393. }
  394. }
  395. + (NSData*) decodedDataFromQuotedPrintableData:(NSData *)data {
  396. // http://tools.ietf.org/html/rfc2045#section-6.7
  397. const char hex [] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', };
  398. NSMutableData* result = [[NSMutableData alloc] initWithLength:data.length];
  399. const char* bytes = (const char*) data.bytes;
  400. int count = 0;
  401. int length = data.length;
  402. while( count < length ) {
  403. if( bytes[count] == '=' ) {
  404. [result appendBytes:bytes length:count];
  405. bytes = bytes + count + 1;
  406. length -= count + 1;
  407. count = 0;
  408. if( length < 3 ) {
  409. HTTPLogWarn(@"MultipartFormDataParser: warning, trailing '=' in quoted printable data");
  410. }
  411. // soft newline
  412. if( bytes[0] == '\r' ) {
  413. bytes += 1;
  414. if(bytes[1] == '\n' ) {
  415. bytes += 2;
  416. }
  417. continue;
  418. }
  419. char encodedByte = 0;
  420. for( int i = 0; i < sizeof(hex); i++ ) {
  421. if( hex[i] == bytes[0] ) {
  422. encodedByte += i << 4;
  423. }
  424. if( hex[i] == bytes[1] ) {
  425. encodedByte += i;
  426. }
  427. }
  428. [result appendBytes:&encodedByte length:1];
  429. bytes += 2;
  430. }
  431. #ifdef DEBUG
  432. if( (unsigned char) bytes[count] > 126 ) {
  433. HTTPLogWarn(@"MultipartFormDataParser: Warning, character with code above 126 appears in quoted printable encoded data");
  434. }
  435. #endif
  436. count++;
  437. }
  438. return result;
  439. }
  440. @end