PageRenderTime 48ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/DustMite/dsplit.d

https://github.com/s-ludwig/tools
D | 540 lines | 512 code | 22 blank | 6 comment | 24 complexity | 74b6ded6008e31bbf93f3947b560570d MD5 | raw file
  1. /// Very simplistic D source code "parser"
  2. /// Written by Vladimir Panteleev <vladimir@thecybershadow.net>
  3. /// Released into the Public Domain
  4. module dsplit;
  5. import std.file;
  6. import std.path;
  7. import std.string;
  8. import std.ascii;
  9. import std.array;
  10. debug import std.stdio;
  11. class Entity
  12. {
  13. string head;
  14. Entity[] children;
  15. string tail;
  16. string filename, contents;
  17. @property bool isFile() { return filename != ""; }
  18. bool isPair; /// internal hint
  19. bool noRemove; /// don't try removing this entity (children OK)
  20. bool removed; /// For dangling dependencies
  21. Entity[] dependencies;
  22. int id; /// For diagnostics
  23. size_t descendants; /// For progress display
  24. this(string head = null, Entity[] children = null, string tail = null, string filename = null, bool isPair = false)
  25. {
  26. this.head = head;
  27. this.children = children;
  28. this.tail = tail;
  29. this.filename = filename;
  30. this.isPair = isPair;
  31. }
  32. }
  33. struct ParseOptions
  34. {
  35. enum Mode { Source, Words }
  36. bool stripComments;
  37. Mode mode;
  38. }
  39. Entity loadFiles(ref string path, ParseOptions options)
  40. {
  41. if (isFile(path))
  42. {
  43. auto filePath = path;
  44. path = stripExtension(path);
  45. return loadFile(baseName(filePath).replace(`\`, `/`), filePath, options);
  46. }
  47. else
  48. {
  49. auto set = new Entity();
  50. foreach (string entry; dirEntries(path, SpanMode.breadth))
  51. if (isFile(entry))
  52. {
  53. assert(entry.startsWith(path));
  54. auto name = entry[path.length+1..$];
  55. set.children ~= loadFile(name, entry, options);
  56. }
  57. return set;
  58. }
  59. }
  60. enum BIN_SIZE = 2;
  61. void optimize(Entity set)
  62. {
  63. static void group(ref Entity[] set, size_t start, size_t end)
  64. {
  65. //set = set[0..start] ~ [new Entity(removable, set[start..end])] ~ set[end..$];
  66. set.replaceInPlace(start, end, [new Entity(null, set[start..end].dup, null)]);
  67. }
  68. static void clusterBy(ref Entity[] set, size_t binSize)
  69. {
  70. while (set.length > binSize)
  71. {
  72. auto size = set.length >= binSize*2 ? binSize : (set.length+1) / 2;
  73. //auto size = binSize;
  74. auto bins = set.length/size;
  75. if (set.length % size > 1)
  76. group(set, bins*size, set.length);
  77. foreach_reverse (i; 0..bins)
  78. group(set, i*size, (i+1)*size);
  79. }
  80. }
  81. static void doOptimize(Entity e)
  82. {
  83. foreach (c; e.children)
  84. doOptimize(c);
  85. clusterBy(e.children, BIN_SIZE);
  86. }
  87. doOptimize(set);
  88. }
  89. private:
  90. Entity loadFile(string name, string path, ParseOptions options)
  91. {
  92. debug writeln("Loading ", path);
  93. auto result = new Entity();
  94. result.filename = name.replace(`\`, `/`);
  95. result.contents = cast(string)read(path);
  96. if (options.stripComments)
  97. if (extension(path) == ".d" || extension(path) == ".di")
  98. result.contents = stripDComments(result.contents);
  99. final switch (options.mode)
  100. {
  101. case ParseOptions.Mode.Source:
  102. switch (extension(path))
  103. {
  104. case ".d":
  105. case ".di":
  106. result.children = parseD(result.contents); return result;
  107. // One could add custom splitters for other languages here - for example, a simple line/word/character splitter for most text-based formats
  108. default:
  109. result.children = [new Entity(result.contents, null, null)]; return result;
  110. }
  111. case ParseOptions.Mode.Words:
  112. result.children = parseToWords(result.contents); return result;
  113. }
  114. }
  115. string skipSymbol(string s, ref size_t i)
  116. {
  117. auto start = i;
  118. switch (s[i])
  119. {
  120. case '\'':
  121. i++;
  122. if (s[i] == '\\')
  123. i+=2;
  124. while (s[i] != '\'')
  125. i++;
  126. i++;
  127. break;
  128. case '\\':
  129. i+=2;
  130. break;
  131. case '"':
  132. if (i && s[i-1] == 'r')
  133. {
  134. i++;
  135. while (s[i] != '"')
  136. i++;
  137. i++;
  138. }
  139. else
  140. {
  141. i++;
  142. while (s[i] != '"')
  143. {
  144. if (s[i] == '\\')
  145. i+=2;
  146. else
  147. i++;
  148. }
  149. i++;
  150. }
  151. break;
  152. case '`':
  153. i++;
  154. while (s[i] != '`')
  155. i++;
  156. i++;
  157. break;
  158. case '/':
  159. i++;
  160. if (i==s.length)
  161. break;
  162. else
  163. if (s[i] == '/')
  164. {
  165. while (i < s.length && s[i] != '\r' && s[i] != '\n')
  166. i++;
  167. }
  168. else
  169. if (s[i] == '*')
  170. {
  171. i+=3;
  172. while (s[i-2] != '*' || s[i-1] != '/')
  173. i++;
  174. }
  175. else
  176. if (s[i] == '+')
  177. {
  178. i++;
  179. int commentLevel = 1;
  180. while (commentLevel)
  181. {
  182. if (s[i] == '/' && s[i+1]=='+')
  183. commentLevel++, i+=2;
  184. else
  185. if (s[i] == '+' && s[i+1]=='/')
  186. commentLevel--, i+=2;
  187. else
  188. i++;
  189. }
  190. }
  191. else
  192. i++;
  193. break;
  194. default:
  195. i++;
  196. break;
  197. }
  198. return s[start..i];
  199. }
  200. /// Moves i forward over first series of EOL characters, or until first non-whitespace character
  201. void skipToEOL(string s, ref size_t i)
  202. {
  203. while (i < s.length)
  204. {
  205. if (s[i] == '\r' || s[i] == '\n')
  206. {
  207. while (i < s.length && (s[i] == '\r' || s[i] == '\n'))
  208. i++;
  209. return;
  210. }
  211. else
  212. if (isWhite(s[i]))
  213. i++;
  214. else
  215. if (s[i..$].startsWith("//"))
  216. skipSymbol(s, i);
  217. else
  218. break;
  219. }
  220. }
  221. /// Moves i backwards to the beginning of the current line, but not any further than start
  222. void backToEOL(string s, ref size_t i, size_t start)
  223. {
  224. while (i>start && isWhite(s[i-1]) && s[i-1] != '\n')
  225. i--;
  226. }
  227. Entity[] parseD(string s)
  228. {
  229. size_t i = 0;
  230. size_t start;
  231. string innerTail;
  232. Entity[] parseScope(char end)
  233. {
  234. // Here be dragons.
  235. enum MAX_SPLITTER_LEVELS = 5;
  236. struct DSplitter { char open, close, sep; }
  237. static const DSplitter[MAX_SPLITTER_LEVELS] splitters = [{'{','}',';'}, {'(',')'}, {'[',']'}, {sep:','}, {sep:' '}];
  238. Entity[][MAX_SPLITTER_LEVELS] splitterQueue;
  239. Entity[] terminateLevel(int level)
  240. {
  241. if (level == MAX_SPLITTER_LEVELS)
  242. {
  243. auto text = s[start..i];
  244. start = i;
  245. return splitText(text);
  246. }
  247. else
  248. {
  249. auto next = terminateLevel(level+1);
  250. if (next.length <= 1)
  251. splitterQueue[level] ~= next;
  252. else
  253. splitterQueue[level] ~= new Entity(null, next, null);
  254. auto r = splitterQueue[level];
  255. splitterQueue[level] = null;
  256. return r;
  257. }
  258. }
  259. string terminateText()
  260. {
  261. auto r = s[start..i];
  262. start = i;
  263. return r;
  264. }
  265. characterLoop:
  266. while (i < s.length)
  267. {
  268. char c = s[i];
  269. foreach (int level, info; splitters)
  270. if (info.sep && c == info.sep)
  271. {
  272. auto children = terminateLevel(level+1);
  273. assert(i == start);
  274. i++; skipToEOL(s, i);
  275. splitterQueue[level] ~= new Entity(null, children, terminateText());
  276. continue characterLoop;
  277. }
  278. else
  279. if (info.open && c == info.open)
  280. {
  281. auto openPos = i;
  282. backToEOL(s, i, start);
  283. auto pairHead = terminateLevel(level+1);
  284. i = openPos+1; skipToEOL(s, i);
  285. auto startSequence = terminateText();
  286. auto bodyContents = parseScope(info.close);
  287. auto pairBody = new Entity(startSequence, bodyContents, innerTail);
  288. if (pairHead.length == 0)
  289. splitterQueue[level] ~= pairBody;
  290. else
  291. if (pairHead.length == 1)
  292. splitterQueue[level] ~= new Entity(null, pairHead ~ pairBody, null, null, true);
  293. else
  294. splitterQueue[level] ~= new Entity(null, [new Entity(null, pairHead, null), pairBody], null, null, true);
  295. continue characterLoop;
  296. }
  297. if (end && c == end)
  298. {
  299. auto closePos = i;
  300. backToEOL(s, i, start);
  301. auto result = terminateLevel(0);
  302. i = closePos+1; skipToEOL(s, i);
  303. innerTail = terminateText();
  304. return result;
  305. }
  306. else
  307. skipSymbol(s, i);
  308. }
  309. innerTail = null;
  310. return terminateLevel(0);
  311. }
  312. auto result = parseScope(0);
  313. postProcessD(result);
  314. return result;
  315. }
  316. string stripDComments(string s)
  317. {
  318. auto result = appender!string();
  319. size_t i = 0;
  320. while (i < s.length)
  321. {
  322. auto sym = skipSymbol(s, i);
  323. if (!sym.startsWithComment())
  324. result.put(sym);
  325. }
  326. return result.data;
  327. }
  328. void postProcessD(ref Entity[] entities)
  329. {
  330. for (int i=0; i<entities.length;)
  331. {
  332. // Process comma-separated lists. Nest later items and add a dependency for the comma.
  333. if (i+2 <= entities.length && entities[i].children.length >= 1 && entities[i].tail.stripD() == ",")
  334. {
  335. // Put the comma in its own entity, so it can have a dependency
  336. auto comma = new Entity(entities[i].tail);
  337. entities[i].tail = null;
  338. // Create a separate group for the item and its following comma, so that they could be removed together
  339. auto commaGroup = new Entity(null, [entities[i].children[$-1], comma], null);
  340. entities[i].children[$-1] = commaGroup;
  341. // Place all the remaining items from the current entity into their own new group,
  342. // so that they could be removed together and the comma could have a dependency on all the remaining items
  343. auto rest = new Entity(null, entities[i+1..$], null);
  344. entities[i].children ~= rest;
  345. entities = entities[0..i+1];
  346. // Register the dependency
  347. comma.dependencies ~= rest;
  348. }
  349. // Group together consecutive entities which might represent a single language construct
  350. // There is no penalty for false positives, so accuracy is not very important
  351. if (i+2 <= entities.length && entities.length > 2 && (
  352. (getHeadText(entities[i]).startsWithWord("do") && getHeadText(entities[i+1]).isWord("while"))
  353. || (getHeadText(entities[i]).startsWithWord("try") && getHeadText(entities[i+1]).startsWithWord("catch"))
  354. || (getHeadText(entities[i]).startsWithWord("try") && getHeadText(entities[i+1]).startsWithWord("finally"))
  355. || (getHeadText(entities[i+1]).isWord("in"))
  356. || (getHeadText(entities[i+1]).isWord("out"))
  357. || (getHeadText(entities[i+1]).isWord("body"))
  358. ))
  359. {
  360. entities.replaceInPlace(i, i+2, [new Entity(null, entities[i..i+2].dup, null)]);
  361. continue;
  362. }
  363. postProcessD(entities[i].children);
  364. i++;
  365. }
  366. }
  367. const bool[string] wordsToSplit;
  368. static this() { wordsToSplit = ["else":true]; }
  369. Entity[] splitText(string s)
  370. {
  371. Entity[] result;
  372. while (s.length)
  373. {
  374. auto word = firstWord(s);
  375. if (word in wordsToSplit)
  376. {
  377. size_t p = word.ptr + word.length - s.ptr;
  378. skipToEOL(s, p);
  379. result ~= new Entity(s[0..p], null, null);
  380. s = s[p..$];
  381. }
  382. else
  383. {
  384. result ~= new Entity(s, null, null);
  385. s = null;
  386. }
  387. }
  388. return result;
  389. }
  390. string stripD(string s)
  391. {
  392. size_t i=0;
  393. size_t start=s.length, end=s.length;
  394. while (i < s.length)
  395. {
  396. if (s[i..$].startsWithComment())
  397. skipSymbol(s, i);
  398. else
  399. if (!isWhite(s[i]))
  400. {
  401. if (start > i)
  402. start = i;
  403. skipSymbol(s, i);
  404. end = i;
  405. }
  406. else
  407. i++;
  408. }
  409. return s[start..end];
  410. }
  411. string firstWord(string s)
  412. {
  413. size_t i = 0;
  414. s = stripD(s);
  415. while (i<s.length && !isWhite(s[i]))
  416. i++;
  417. return s[0..i];
  418. }
  419. bool startsWithWord(string s, string word)
  420. {
  421. s = stripD(s);
  422. return s.startsWith(word) && (s.length == word.length || !isAlphaNum(s[word.length]));
  423. }
  424. bool endsWithWord(string s, string word)
  425. {
  426. s = stripD(s);
  427. return s.endsWith(word) && (s.length == word.length || !isAlphaNum(s[$-word.length-1]));
  428. }
  429. bool isWord(string s, string word)
  430. {
  431. return stripD(s) == word;
  432. }
  433. bool startsWithComment(string s)
  434. {
  435. return s.startsWith("//") || s.startsWith("/*") || s.startsWith("/+");
  436. }
  437. Entity getHeadEntity(Entity e)
  438. {
  439. if (e.head.length)
  440. return e;
  441. foreach (child; e.children)
  442. {
  443. Entity r = getHeadEntity(child);
  444. if (r)
  445. return r;
  446. }
  447. if (e.tail.length)
  448. return e;
  449. return null;
  450. }
  451. string getHeadText(Entity e)
  452. {
  453. e = getHeadEntity(e);
  454. if (!e)
  455. return null;
  456. if (e.head)
  457. return e.head;
  458. return e.tail;
  459. }
  460. // ParseOptions.Mode.Words
  461. bool isDWordChar(char c)
  462. {
  463. return isAlphaNum(c) || c=='_' || c=='@';
  464. }
  465. public Entity[] parseToWords(string text)
  466. {
  467. Entity[] result;
  468. size_t i, wordStart, wordEnd;
  469. for (i = 1; i <= text.length; i++)
  470. if (i==text.length || (!isDWordChar(text[i-1]) && isDWordChar(text[i])))
  471. {
  472. if (wordStart != i)
  473. result ~= new Entity(text[wordStart..wordEnd], null, text[wordEnd..i]);
  474. wordStart = wordEnd = i;
  475. }
  476. else
  477. if ((isDWordChar(text[i-1]) && !isDWordChar(text[i])))
  478. wordEnd = i;
  479. return result;
  480. }