PageRenderTime 50ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/tools/license2rtf.js

https://gitlab.com/CORP-RESELLER/node
JavaScript | 332 lines | 306 code | 18 blank | 8 comment | 7 complexity | 2cee82d57a572688bc7722ae2ce3a454 MD5 | raw file
  1. 'use strict';
  2. const assert = require('assert');
  3. const Stream = require('stream');
  4. const inherits = require('util').inherits;
  5. /*
  6. * This filter consumes a stream of characters and emits one string per line.
  7. */
  8. function LineSplitter() {
  9. const self = this;
  10. var buffer = '';
  11. Stream.call(this);
  12. this.writable = true;
  13. this.write = function(data) {
  14. var lines = (buffer + data).split(/\r\n|\n\r|\n|\r/);
  15. for (var i = 0; i < lines.length - 1; i++) {
  16. self.emit('data', lines[i]);
  17. }
  18. buffer = lines[lines.length - 1];
  19. return true;
  20. };
  21. this.end = function(data) {
  22. this.write(data || '');
  23. if (buffer) {
  24. self.emit('data', buffer);
  25. }
  26. self.emit('end');
  27. };
  28. }
  29. inherits(LineSplitter, Stream);
  30. /*
  31. * This filter consumes lines and emits paragraph objects.
  32. */
  33. function ParagraphParser() {
  34. const self = this;
  35. var block_is_license_block = false;
  36. var block_has_c_style_comment;
  37. var paragraph_line_indent;
  38. var paragraph;
  39. Stream.call(this);
  40. this.writable = true;
  41. resetBlock(false);
  42. this.write = function(data) {
  43. parseLine(data + '');
  44. return true;
  45. };
  46. this.end = function(data) {
  47. if (data) {
  48. parseLine(data + '');
  49. }
  50. flushParagraph();
  51. self.emit('end');
  52. };
  53. function resetParagraph() {
  54. paragraph_line_indent = -1;
  55. paragraph = {
  56. li: '',
  57. in_license_block: block_is_license_block,
  58. lines: []
  59. };
  60. }
  61. function resetBlock(is_license_block) {
  62. block_is_license_block = is_license_block;
  63. block_has_c_style_comment = false;
  64. resetParagraph();
  65. }
  66. function flushParagraph() {
  67. if (paragraph.lines.length || paragraph.li) {
  68. self.emit('data', paragraph);
  69. }
  70. resetParagraph();
  71. }
  72. function parseLine(line) {
  73. // Strip trailing whitespace
  74. line = line.replace(/\s*$/, '');
  75. // Detect block separator
  76. if (/^\s*(=|"){3,}\s*$/.test(line)) {
  77. flushParagraph();
  78. resetBlock(!block_is_license_block);
  79. return;
  80. }
  81. // Strip comments around block
  82. if (block_is_license_block) {
  83. if (!block_has_c_style_comment)
  84. block_has_c_style_comment = /^\s*(\/\*)/.test(line);
  85. if (block_has_c_style_comment) {
  86. var prev = line;
  87. line = line.replace(/^(\s*?)(?:\s?\*\/|\/\*\s|\s\*\s?)/, '$1');
  88. if (prev == line)
  89. line = line.replace(/^\s{2}/, '');
  90. if (/\*\//.test(prev))
  91. block_has_c_style_comment = false;
  92. } else {
  93. // Strip C++ and perl style comments.
  94. line = line.replace(/^(\s*)(?:\/\/\s?|#\s?)/, '$1');
  95. }
  96. }
  97. // Detect blank line (paragraph separator)
  98. if (!/\S/.test(line)) {
  99. flushParagraph();
  100. return;
  101. }
  102. // Detect separator "lines" within a block. These mark a paragraph break
  103. // and are stripped from the output.
  104. if (/^\s*[=*\-]{5,}\s*$/.test(line)) {
  105. flushParagraph();
  106. return;
  107. }
  108. // Find out indentation level and the start of a lied or numbered list;
  109. var result = /^(\s*)(\d+\.|\*|-)?\s*/.exec(line);
  110. assert.ok(result);
  111. // The number of characters that will be stripped from the beginning of
  112. // the line.
  113. var line_strip_length = result[0].length;
  114. // The indentation size that will be used to detect indentation jumps.
  115. // Fudge by 1 space.
  116. var line_indent = Math.floor(result[0].length / 2) * 2;
  117. // The indentation level that will be exported
  118. var level = Math.floor(result[1].length / 2);
  119. // The list indicator that precedes the actual content, if any.
  120. var line_li = result[2];
  121. // Flush the paragraph when there is a li or an indentation jump
  122. if (line_li || (line_indent != paragraph_line_indent &&
  123. paragraph_line_indent != -1)) {
  124. flushParagraph();
  125. paragraph.li = line_li;
  126. }
  127. // Set the paragraph indent that we use to detect indentation jumps. When
  128. // we just detected a list indicator, wait
  129. // for the next line to arrive before setting this.
  130. if (!line_li && paragraph_line_indent != -1) {
  131. paragraph_line_indent = line_indent;
  132. }
  133. // Set the output indent level if it has not been set yet.
  134. if (paragraph.level === undefined)
  135. paragraph.level = level;
  136. // Strip leading whitespace and li.
  137. line = line.slice(line_strip_length);
  138. if (line)
  139. paragraph.lines.push(line);
  140. }
  141. }
  142. inherits(ParagraphParser, Stream);
  143. /*
  144. * This filter consumes paragraph objects and emits modified paragraph objects.
  145. * The lines within the paragraph are unwrapped where appropriate. It also
  146. * replaces multiple consecutive whitespace characters by a single one.
  147. */
  148. function Unwrapper() {
  149. var self = this;
  150. Stream.call(this);
  151. this.writable = true;
  152. this.write = function(paragraph) {
  153. var lines = paragraph.lines;
  154. var break_after = [];
  155. var i;
  156. for (i = 0; i < lines.length - 1; i++) {
  157. var line = lines[i];
  158. // When a line is really short, the line was probably kept separate for a
  159. // reason.
  160. if (line.length < 50) {
  161. // If the first word on the next line really didn't fit after the line,
  162. // it probably was just ordinary wrapping after all.
  163. var next_first_word_length = lines[i + 1].replace(/\s.*$/, '').length;
  164. if (line.length + next_first_word_length < 60) {
  165. break_after[i] = true;
  166. }
  167. }
  168. }
  169. for (i = 0; i < lines.length - 1;) {
  170. if (!break_after[i]) {
  171. lines[i] += ' ' + lines.splice(i + 1, 1)[0];
  172. } else {
  173. i++;
  174. }
  175. }
  176. for (i = 0; i < lines.length; i++) {
  177. // Replace multiple whitespace characters by a single one, and strip
  178. // trailing whitespace.
  179. lines[i] = lines[i].replace(/\s+/g, ' ').replace(/\s+$/, '');
  180. }
  181. self.emit('data', paragraph);
  182. };
  183. this.end = function(data) {
  184. if (data)
  185. self.write(data);
  186. self.emit('end');
  187. };
  188. }
  189. inherits(Unwrapper, Stream);
  190. /*
  191. * This filter generates an rtf document from a stream of paragraph objects.
  192. */
  193. function RtfGenerator() {
  194. const self = this;
  195. var did_write_anything = false;
  196. Stream.call(this);
  197. this.writable = true;
  198. this.write = function(paragraph) {
  199. if (!did_write_anything) {
  200. emitHeader();
  201. did_write_anything = true;
  202. }
  203. var li = paragraph.li;
  204. var level = paragraph.level + (li ? 1 : 0);
  205. var lic = paragraph.in_license_block;
  206. var rtf = '\\pard';
  207. rtf += '\\sa150\\sl300\\slmult1';
  208. if (level > 0)
  209. rtf += '\\li' + (level * 240);
  210. if (li) {
  211. rtf += '\\tx' + (level) * 240;
  212. rtf += '\\fi-240';
  213. }
  214. if (lic)
  215. rtf += '\\ri240';
  216. if (!lic)
  217. rtf += '\\b';
  218. if (li)
  219. rtf += ' ' + li + '\\tab';
  220. rtf += ' ';
  221. rtf += paragraph.lines.map(rtfEscape).join('\\line ');
  222. if (!lic)
  223. rtf += '\\b0';
  224. rtf += '\\par\n';
  225. self.emit('data', rtf);
  226. };
  227. this.end = function(data) {
  228. if (data)
  229. self.write(data);
  230. if (did_write_anything)
  231. emitFooter();
  232. self.emit('end');
  233. };
  234. function toHex(number, length) {
  235. var hex = (~~number).toString(16);
  236. while (hex.length < length)
  237. hex = '0' + hex;
  238. return hex;
  239. }
  240. function rtfEscape(string) {
  241. return string
  242. .replace(/[\\\{\}]/g, function(m) {
  243. return '\\' + m;
  244. })
  245. .replace(/\t/g, function() {
  246. return '\\tab ';
  247. })
  248. // eslint-disable-next-line no-control-regex
  249. .replace(/[\x00-\x1f\x7f-\xff]/g, function(m) {
  250. return '\\\'' + toHex(m.charCodeAt(0), 2);
  251. })
  252. .replace(/\ufeff/g, '')
  253. .replace(/[\u0100-\uffff]/g, function(m) {
  254. return '\\u' + toHex(m.charCodeAt(0), 4) + '?';
  255. });
  256. }
  257. function emitHeader() {
  258. self.emit('data', '{\\rtf1\\ansi\\ansicpg1252\\uc1\\deff0\\deflang1033' +
  259. '{\\fonttbl{\\f0\\fswiss\\fcharset0 Tahoma;}}\\fs20\n' +
  260. '{\\*\\generator txt2rtf 0.0.1;}\n');
  261. }
  262. function emitFooter() {
  263. self.emit('data', '}');
  264. }
  265. }
  266. inherits(RtfGenerator, Stream);
  267. const stdin = process.stdin;
  268. const stdout = process.stdout;
  269. const line_splitter = new LineSplitter();
  270. const paragraph_parser = new ParagraphParser();
  271. const unwrapper = new Unwrapper();
  272. const rtf_generator = new RtfGenerator();
  273. stdin.setEncoding('utf-8');
  274. stdin.resume();
  275. stdin.pipe(line_splitter);
  276. line_splitter.pipe(paragraph_parser);
  277. paragraph_parser.pipe(unwrapper);
  278. unwrapper.pipe(rtf_generator);
  279. rtf_generator.pipe(stdout);