PageRenderTime 257ms CodeModel.GetById 32ms RepoModel.GetById 1ms app.codeStats 0ms

/src/Parser.cpp

http://github.com/evilsocket/SoftWire
C++ | 564 lines | 468 code | 68 blank | 28 comment | 113 complexity | ffc928965e031232f32e8d79a5085d47 MD5 | raw file
Possible License(s): GPL-3.0
  1. /***************************************************************************
  2. * SoftWire Runtime x86 Assembler Library *
  3. * Project originally by Nicolas Capens, *
  4. * new implementation by Simone Margaritelli <evilsocket@gmail.com> *
  5. * *
  6. * This program is free software; you can redistribute it and/or modify *
  7. * it under the terms of the GNU General Public License as published by *
  8. * the Free Software Foundation; either version 2 of the License, or *
  9. * (at your option) any later version. *
  10. * *
  11. * This program is distributed in the hope that it will be useful, *
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  14. * GNU General Public License for more details. *
  15. * *
  16. * You should have received a copy of the GNU General Public License *
  17. * along with this program; if not, write to the *
  18. * Free Software Foundation, Inc., *
  19. * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
  20. ***************************************************************************/
  21. #include "Parser.hpp"
  22. #include "TokenList.hpp"
  23. #include "Token.hpp"
  24. #include "Error.hpp"
  25. #include "String.hpp"
  26. #include "Synthesizer.hpp"
  27. namespace SoftWire
  28. {
  29. #error Important: In newer versions of SoftWire the built-in file assembler might be removed.
  30. #error Classes like the Parser, Scanner, Token would be deprecated. The reason for this is
  31. #error that it is in the way of new development, and has no added value for me any more.
  32. #error Run-time intrinsics are far more powerful, while the file assembler hasn't changed a
  33. #error single bit for a whole year. The file assembler would still be available in older
  34. #error versions of course. If you are using the file assembler and/or would like to keep it in
  35. #error newer versions, for any reason, please let me know! E-mail: evilsocket@gmail.com
  36. #error Remove these lines to continue compilation.
  37. Parser::Parser(TokenList &tokenList, Synthesizer &synthesizer, const InstructionSet &instructionSet) : tokenList(tokenList), synthesizer(synthesizer), instructionSet(instructionSet)
  38. {
  39. }
  40. Parser::~Parser()
  41. {
  42. }
  43. const Encoding &Parser::parseLine()
  44. {
  45. instruction = 0;
  46. synthesizer.reset();
  47. if(!tokenList.isEndOfLine())
  48. {
  49. parseLabel();
  50. }
  51. if(!tokenList.isEndOfLine())
  52. {
  53. parseMnemonic();
  54. parseFirstOperand();
  55. parseSecondOperand();
  56. parseThirdOperand();
  57. }
  58. int shortestSize = 16;
  59. Instruction *bestMatch = 0;
  60. if(instruction)
  61. {
  62. do
  63. {
  64. if(instruction->matchSyntax())
  65. {
  66. const int size = instruction->approximateSize();
  67. if(size < shortestSize)
  68. {
  69. bestMatch = instruction;
  70. shortestSize = size;
  71. }
  72. }
  73. instruction = instruction->getNext();
  74. }
  75. while(instruction);
  76. if(!bestMatch)
  77. {
  78. throw Error("Operands mismatch");
  79. }
  80. }
  81. return synthesizer.encodeInstruction(bestMatch);
  82. }
  83. const char *Parser::skipLine() const
  84. {
  85. while(!tokenList.isEndOfLine())
  86. {
  87. tokenList.advance();
  88. }
  89. const char *currentLine = tokenList.getString();
  90. if(!tokenList.isEndOfFile())
  91. {
  92. tokenList.advance();
  93. }
  94. return currentLine;
  95. }
  96. void Parser::parseLabel()
  97. {
  98. if(tokenList.isIdentifier() && tokenList.lookAhead().isPunctuator(':'))
  99. {
  100. synthesizer.defineLabel(tokenList.getString());
  101. tokenList.advance(2);
  102. }
  103. }
  104. void Parser::parseMnemonic()
  105. {
  106. char mnemonic[256] = {0};
  107. strcpy(mnemonic, tokenList.getString());
  108. tokenList.advance();
  109. if(stricmp(mnemonic, "LOCK") == 0)
  110. {
  111. strcat(mnemonic, " ");
  112. strcat(mnemonic, tokenList.getString());
  113. tokenList.advance();
  114. }
  115. instruction = instructionSet.query(mnemonic);
  116. if(!instruction)
  117. {
  118. throw Error("Unrecognized mnemonic '%s'", mnemonic);
  119. }
  120. }
  121. void Parser::parseSpecifier()
  122. {
  123. Specifier::Type type = Specifier::TYPE_UNKNOWN;
  124. if(tokenList.isIdentifier())
  125. {
  126. type = Specifier::scan(tokenList.getString());
  127. }
  128. instruction->matchSpecifier(type);
  129. if(type != Specifier::TYPE_UNKNOWN)
  130. {
  131. tokenList.advance();
  132. type = Specifier::scan(tokenList.getString());
  133. if(type == Specifier::TYPE_PTR)
  134. {
  135. tokenList.advance();
  136. }
  137. }
  138. }
  139. void Parser::parseFirstOperand()
  140. {
  141. if(!instruction)
  142. {
  143. throw INTERNAL_ERROR;
  144. }
  145. parseSpecifier();
  146. Operand firstOperand;
  147. if(tokenList.isEndOfLine())
  148. {
  149. }
  150. else if(tokenList.isPunctuator())
  151. {
  152. switch(tokenList.getChar())
  153. {
  154. case '[':
  155. firstOperand = parseMemoryReference();
  156. break;
  157. case '+':
  158. case '-':
  159. case '~':
  160. firstOperand = parseImmediate();
  161. break;
  162. default:
  163. throw Error("Unexpected punctuator '%c' after mnemonic", tokenList.getChar());
  164. }
  165. }
  166. else if(tokenList.isInteger() || tokenList.isReal())
  167. {
  168. firstOperand = parseImmediate();
  169. }
  170. else if(tokenList.isIdentifier())
  171. {
  172. firstOperand = parseRegister();
  173. }
  174. else if(tokenList.isLiteral())
  175. {
  176. firstOperand = parseLiteral();
  177. }
  178. else
  179. {
  180. throw Error("Invalid destination operand");
  181. }
  182. instruction->matchFirstOperand(firstOperand);
  183. synthesizer.encodeFirstOperand(firstOperand);
  184. }
  185. void Parser::parseSecondOperand()
  186. {
  187. if(!instruction)
  188. {
  189. throw INTERNAL_ERROR;
  190. }
  191. if(tokenList.isPunctuator(','))
  192. {
  193. tokenList.advance();
  194. }
  195. else if(!tokenList.isEndOfLine())
  196. {
  197. throw Error("Operands must be separated by comma, found '%c'", tokenList.getChar());
  198. }
  199. else
  200. {
  201. instruction->matchSecondOperand(Operand::OPERAND_VOID);
  202. return;
  203. }
  204. parseSpecifier();
  205. Operand secondOperand;
  206. if(tokenList.isEndOfLine())
  207. {
  208. }
  209. else if(tokenList.isPunctuator())
  210. {
  211. switch(tokenList.getChar())
  212. {
  213. case '[':
  214. secondOperand = parseMemoryReference();
  215. break;
  216. case '+':
  217. case '-':
  218. case '~':
  219. secondOperand = parseImmediate();
  220. break;
  221. default:
  222. throw Error("Unexpected punctuator after mnemonic '%c'", tokenList.getChar());
  223. }
  224. }
  225. else if(tokenList.isInteger())
  226. {
  227. secondOperand = parseImmediate();
  228. }
  229. else if(tokenList.isIdentifier())
  230. {
  231. secondOperand = parseRegister();
  232. }
  233. else
  234. {
  235. throw Error("Invalid source operand");
  236. }
  237. instruction->matchSecondOperand(secondOperand);
  238. synthesizer.encodeSecondOperand(secondOperand);
  239. }
  240. void Parser::parseThirdOperand()
  241. {
  242. if(!instruction)
  243. {
  244. throw INTERNAL_ERROR;
  245. }
  246. if(tokenList.isPunctuator(','))
  247. {
  248. tokenList.advance();
  249. }
  250. else if(!tokenList.isEndOfLine())
  251. {
  252. throw Error("Operands must be separated by comma, found '%c'", tokenList.getChar());
  253. }
  254. else
  255. {
  256. instruction->matchThirdOperand(Operand::OPERAND_VOID);
  257. return;
  258. }
  259. Operand thirdOperand;
  260. if(tokenList.isEndOfLine())
  261. {
  262. }
  263. else if(tokenList.isPunctuator())
  264. {
  265. switch(tokenList.getChar())
  266. {
  267. case '+':
  268. case '-':
  269. case '~':
  270. thirdOperand = parseImmediate();
  271. break;
  272. default:
  273. throw Error("Unexpected punctuator after mnemonic '%c'", tokenList.getChar());
  274. }
  275. }
  276. else if(tokenList.isInteger())
  277. {
  278. thirdOperand = parseImmediate();
  279. }
  280. else
  281. {
  282. throw Error("Too many operands");
  283. }
  284. instruction->matchThirdOperand(thirdOperand);
  285. synthesizer.encodeThirdOperand(thirdOperand);
  286. }
  287. OperandIMM Parser::parseImmediate()
  288. {
  289. OperandIMM imm;
  290. if(tokenList.isPunctuator())
  291. {
  292. if(tokenList.isPunctuator('+'))
  293. {
  294. tokenList.advance();
  295. if(tokenList.isInteger())
  296. {
  297. imm.value = +tokenList.getInteger();
  298. }
  299. else if(tokenList.isReal())
  300. {
  301. float real = +tokenList.getReal();
  302. imm.value = *(int*)&real;
  303. }
  304. else
  305. {
  306. throw Error("Unexpected token following '+'");
  307. }
  308. }
  309. else if(tokenList.isPunctuator('-'))
  310. {
  311. tokenList.advance();
  312. if(tokenList.isInteger())
  313. {
  314. imm.value = -tokenList.getInteger();
  315. }
  316. else if(tokenList.isReal())
  317. {
  318. float real = -tokenList.getReal();
  319. imm.value = *(int*)&real;
  320. }
  321. else
  322. {
  323. throw Error("Unexpected token following '-'");
  324. }
  325. }
  326. else if(tokenList.isPunctuator('~'))
  327. {
  328. tokenList.advance();
  329. if(tokenList.isInteger())
  330. {
  331. imm.value = ~tokenList.getInteger();
  332. }
  333. else
  334. {
  335. throw Error("Unexpected token following '~'");
  336. }
  337. }
  338. else
  339. {
  340. throw INTERNAL_ERROR; // Method shouldn't have been called
  341. }
  342. }
  343. else if(tokenList.isInteger())
  344. {
  345. imm.value = tokenList.getInteger();
  346. }
  347. else if(tokenList.isReal())
  348. {
  349. float real = tokenList.getReal();
  350. imm.value = *(int*)&real;
  351. }
  352. else
  353. {
  354. throw INTERNAL_ERROR; // Method shouldn't have been called
  355. }
  356. if((signed char)imm.value == imm.value)
  357. {
  358. imm.type = Operand::OPERAND_EXT8;
  359. }
  360. else if((unsigned char)imm.value == imm.value)
  361. {
  362. imm.type = Operand::OPERAND_IMM8;
  363. }
  364. else if((unsigned short)imm.value == imm.value)
  365. {
  366. imm.type = Operand::OPERAND_IMM16;
  367. }
  368. else
  369. {
  370. imm.type = Operand::OPERAND_IMM32;
  371. }
  372. tokenList.advance();
  373. return imm;
  374. }
  375. OperandSTR Parser::parseLiteral()
  376. {
  377. OperandSTR str(tokenList.getString());
  378. tokenList.advance();
  379. return str;
  380. }
  381. OperandREG Parser::parseRegister()
  382. {
  383. Operand reg = Operand::scanReg(tokenList.getString());
  384. // It's not a register, so it must be a label
  385. if(reg.type == Operand::OPERAND_UNKNOWN)
  386. {
  387. // Operand type should be immediate
  388. reg.type = Operand::OPERAND_IMM;
  389. reg.notation = tokenList.getString();
  390. }
  391. tokenList.advance();
  392. return reg;
  393. }
  394. OperandMEM Parser::parseMemoryReference()
  395. {
  396. OperandMEM mem;
  397. while(!tokenList.lookAhead().isEndOfLine())
  398. {
  399. const Token &prev = tokenList.current();
  400. tokenList.advance();
  401. const Token &next = tokenList.lookAhead();
  402. if(tokenList.isIdentifier())
  403. {
  404. // Try if it's a register
  405. Operand reg = Operand::scanReg(tokenList.getString());
  406. if(reg.type != Operand::OPERAND_UNKNOWN)
  407. {
  408. // Check if this is a scaled index register
  409. if(prev.isPunctuator('*') || next.isPunctuator('*'))
  410. {
  411. mem.indexReg = reg.reg;
  412. }
  413. else
  414. {
  415. mem.baseReg = reg.reg;
  416. }
  417. }
  418. else
  419. {
  420. // Reference to a variable
  421. mem.reference = tokenList.getString();
  422. }
  423. }
  424. else if(tokenList.isPunctuator())
  425. {
  426. switch(tokenList.getChar())
  427. {
  428. case ']':
  429. mem.type = Operand::OPERAND_MEM;
  430. tokenList.advance();
  431. return mem;
  432. case '+':
  433. // Check if previous and next tokens are identifier or number
  434. if((!prev.isInteger() && !prev.isIdentifier()) ||
  435. (!next.isIdentifier() && !next.isInteger() && !next.isPunctuator('-')))
  436. {
  437. throw Error("Syntax error '+' in memory reference");
  438. }
  439. break;
  440. case '-':
  441. // Check if previous and next tokens are correct type
  442. if((!prev.isInteger() && !prev.isIdentifier() && !prev.isPunctuator('[') && !prev.isPunctuator('+')) ||
  443. !next.isInteger())
  444. {
  445. throw Error("Syntax error '-' in memory reference");
  446. }
  447. break;
  448. case '*':
  449. // Check if previous and next tokens are index and scale
  450. if(!(prev.isInteger() && next.isIdentifier()) &&
  451. !(next.isInteger() && prev.isIdentifier()))
  452. {
  453. throw Error("Syntax error '*' in memory reference");
  454. }
  455. break;
  456. default:
  457. throw Error("Unexpected punctuator in memory reference '%c'", tokenList.getChar());
  458. }
  459. }
  460. else if(tokenList.isInteger())
  461. {
  462. if(prev.isPunctuator('*') || next.isPunctuator('*'))
  463. {
  464. if(tokenList.getInteger() == 1 ||
  465. tokenList.getInteger() == 2 ||
  466. tokenList.getInteger() == 4 ||
  467. tokenList.getInteger() == 8)
  468. {
  469. mem.scale = tokenList.getInteger();
  470. }
  471. else
  472. {
  473. throw Error("Invalid scale in memory reference");
  474. }
  475. }
  476. else if(prev.isPunctuator('-'))
  477. {
  478. mem.displacement -= tokenList.getInteger();
  479. }
  480. else if(prev.isPunctuator('+') || next.isPunctuator('+'))
  481. {
  482. mem.displacement += tokenList.getInteger();
  483. }
  484. else // Static address
  485. {
  486. mem.displacement += tokenList.getInteger();
  487. }
  488. }
  489. else
  490. {
  491. throw Error("Unexpected token in memory reference '%s'", tokenList.getString());
  492. }
  493. }
  494. throw Error("Unexpected end of line in memory reference");
  495. }
  496. }