/scripts/kallsyms.c

http://github.com/mirrors/linux · C · 772 lines · 540 code · 133 blank · 99 comment · 145 complexity · 0de6b6b13ba0ab4075ebed932093c9ad MD5 · raw file

  1. /* Generate assembler source containing symbol information
  2. *
  3. * Copyright 2002 by Kai Germaschewski
  4. *
  5. * This software may be used and distributed according to the terms
  6. * of the GNU General Public License, incorporated herein by reference.
  7. *
  8. * Usage: nm -n vmlinux | scripts/kallsyms [--all-symbols] > symbols.S
  9. *
  10. * Table compression uses all the unused char codes on the symbols and
  11. * maps these to the most used substrings (tokens). For instance, it might
  12. * map char code 0xF7 to represent "write_" and then in every symbol where
  13. * "write_" appears it can be replaced by 0xF7, saving 5 bytes.
  14. * The used codes themselves are also placed in the table so that the
  15. * decompresion can work without "special cases".
  16. * Applied to kernel symbols, this usually produces a compression ratio
  17. * of about 50%.
  18. *
  19. */
  20. #include <stdbool.h>
  21. #include <stdio.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #include <ctype.h>
  25. #include <limits.h>
  26. #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
  27. #define KSYM_NAME_LEN 128
  28. struct sym_entry {
  29. unsigned long long addr;
  30. unsigned int len;
  31. unsigned int start_pos;
  32. unsigned int percpu_absolute;
  33. unsigned char sym[];
  34. };
  35. struct addr_range {
  36. const char *start_sym, *end_sym;
  37. unsigned long long start, end;
  38. };
  39. static unsigned long long _text;
  40. static unsigned long long relative_base;
  41. static struct addr_range text_ranges[] = {
  42. { "_stext", "_etext" },
  43. { "_sinittext", "_einittext" },
  44. };
  45. #define text_range_text (&text_ranges[0])
  46. #define text_range_inittext (&text_ranges[1])
  47. static struct addr_range percpu_range = {
  48. "__per_cpu_start", "__per_cpu_end", -1ULL, 0
  49. };
  50. static struct sym_entry **table;
  51. static unsigned int table_size, table_cnt;
  52. static int all_symbols;
  53. static int absolute_percpu;
  54. static int base_relative;
  55. static int token_profit[0x10000];
  56. /* the table that holds the result of the compression */
  57. static unsigned char best_table[256][2];
  58. static unsigned char best_table_len[256];
  59. static void usage(void)
  60. {
  61. fprintf(stderr, "Usage: kallsyms [--all-symbols] "
  62. "[--base-relative] < in.map > out.S\n");
  63. exit(1);
  64. }
  65. static char *sym_name(const struct sym_entry *s)
  66. {
  67. return (char *)s->sym + 1;
  68. }
  69. static bool is_ignored_symbol(const char *name, char type)
  70. {
  71. static const char * const ignored_symbols[] = {
  72. /*
  73. * Symbols which vary between passes. Passes 1 and 2 must have
  74. * identical symbol lists. The kallsyms_* symbols below are
  75. * only added after pass 1, they would be included in pass 2
  76. * when --all-symbols is specified so exclude them to get a
  77. * stable symbol list.
  78. */
  79. "kallsyms_addresses",
  80. "kallsyms_offsets",
  81. "kallsyms_relative_base",
  82. "kallsyms_num_syms",
  83. "kallsyms_names",
  84. "kallsyms_markers",
  85. "kallsyms_token_table",
  86. "kallsyms_token_index",
  87. /* Exclude linker generated symbols which vary between passes */
  88. "_SDA_BASE_", /* ppc */
  89. "_SDA2_BASE_", /* ppc */
  90. NULL
  91. };
  92. static const char * const ignored_prefixes[] = {
  93. "$", /* local symbols for ARM, MIPS, etc. */
  94. ".LASANPC", /* s390 kasan local symbols */
  95. "__crc_", /* modversions */
  96. "__efistub_", /* arm64 EFI stub namespace */
  97. NULL
  98. };
  99. static const char * const ignored_suffixes[] = {
  100. "_from_arm", /* arm */
  101. "_from_thumb", /* arm */
  102. "_veneer", /* arm */
  103. NULL
  104. };
  105. const char * const *p;
  106. /* Exclude symbols which vary between passes. */
  107. for (p = ignored_symbols; *p; p++)
  108. if (!strcmp(name, *p))
  109. return true;
  110. for (p = ignored_prefixes; *p; p++)
  111. if (!strncmp(name, *p, strlen(*p)))
  112. return true;
  113. for (p = ignored_suffixes; *p; p++) {
  114. int l = strlen(name) - strlen(*p);
  115. if (l >= 0 && !strcmp(name + l, *p))
  116. return true;
  117. }
  118. if (type == 'U' || type == 'u')
  119. return true;
  120. /* exclude debugging symbols */
  121. if (type == 'N' || type == 'n')
  122. return true;
  123. if (toupper(type) == 'A') {
  124. /* Keep these useful absolute symbols */
  125. if (strcmp(name, "__kernel_syscall_via_break") &&
  126. strcmp(name, "__kernel_syscall_via_epc") &&
  127. strcmp(name, "__kernel_sigtramp") &&
  128. strcmp(name, "__gp"))
  129. return true;
  130. }
  131. return false;
  132. }
  133. static void check_symbol_range(const char *sym, unsigned long long addr,
  134. struct addr_range *ranges, int entries)
  135. {
  136. size_t i;
  137. struct addr_range *ar;
  138. for (i = 0; i < entries; ++i) {
  139. ar = &ranges[i];
  140. if (strcmp(sym, ar->start_sym) == 0) {
  141. ar->start = addr;
  142. return;
  143. } else if (strcmp(sym, ar->end_sym) == 0) {
  144. ar->end = addr;
  145. return;
  146. }
  147. }
  148. }
  149. static struct sym_entry *read_symbol(FILE *in)
  150. {
  151. char name[500], type;
  152. unsigned long long addr;
  153. unsigned int len;
  154. struct sym_entry *sym;
  155. int rc;
  156. rc = fscanf(in, "%llx %c %499s\n", &addr, &type, name);
  157. if (rc != 3) {
  158. if (rc != EOF && fgets(name, 500, in) == NULL)
  159. fprintf(stderr, "Read error or end of file.\n");
  160. return NULL;
  161. }
  162. if (strlen(name) >= KSYM_NAME_LEN) {
  163. fprintf(stderr, "Symbol %s too long for kallsyms (%zu >= %d).\n"
  164. "Please increase KSYM_NAME_LEN both in kernel and kallsyms.c\n",
  165. name, strlen(name), KSYM_NAME_LEN);
  166. return NULL;
  167. }
  168. if (strcmp(name, "_text") == 0)
  169. _text = addr;
  170. /* Ignore most absolute/undefined (?) symbols. */
  171. if (is_ignored_symbol(name, type))
  172. return NULL;
  173. check_symbol_range(name, addr, text_ranges, ARRAY_SIZE(text_ranges));
  174. check_symbol_range(name, addr, &percpu_range, 1);
  175. /* include the type field in the symbol name, so that it gets
  176. * compressed together */
  177. len = strlen(name) + 1;
  178. sym = malloc(sizeof(*sym) + len + 1);
  179. if (!sym) {
  180. fprintf(stderr, "kallsyms failure: "
  181. "unable to allocate required amount of memory\n");
  182. exit(EXIT_FAILURE);
  183. }
  184. sym->addr = addr;
  185. sym->len = len;
  186. sym->sym[0] = type;
  187. strcpy(sym_name(sym), name);
  188. sym->percpu_absolute = 0;
  189. return sym;
  190. }
  191. static int symbol_in_range(const struct sym_entry *s,
  192. const struct addr_range *ranges, int entries)
  193. {
  194. size_t i;
  195. const struct addr_range *ar;
  196. for (i = 0; i < entries; ++i) {
  197. ar = &ranges[i];
  198. if (s->addr >= ar->start && s->addr <= ar->end)
  199. return 1;
  200. }
  201. return 0;
  202. }
  203. static int symbol_valid(const struct sym_entry *s)
  204. {
  205. const char *name = sym_name(s);
  206. /* if --all-symbols is not specified, then symbols outside the text
  207. * and inittext sections are discarded */
  208. if (!all_symbols) {
  209. if (symbol_in_range(s, text_ranges,
  210. ARRAY_SIZE(text_ranges)) == 0)
  211. return 0;
  212. /* Corner case. Discard any symbols with the same value as
  213. * _etext _einittext; they can move between pass 1 and 2 when
  214. * the kallsyms data are added. If these symbols move then
  215. * they may get dropped in pass 2, which breaks the kallsyms
  216. * rules.
  217. */
  218. if ((s->addr == text_range_text->end &&
  219. strcmp(name, text_range_text->end_sym)) ||
  220. (s->addr == text_range_inittext->end &&
  221. strcmp(name, text_range_inittext->end_sym)))
  222. return 0;
  223. }
  224. return 1;
  225. }
  226. /* remove all the invalid symbols from the table */
  227. static void shrink_table(void)
  228. {
  229. unsigned int i, pos;
  230. pos = 0;
  231. for (i = 0; i < table_cnt; i++) {
  232. if (symbol_valid(table[i])) {
  233. if (pos != i)
  234. table[pos] = table[i];
  235. pos++;
  236. } else {
  237. free(table[i]);
  238. }
  239. }
  240. table_cnt = pos;
  241. /* When valid symbol is not registered, exit to error */
  242. if (!table_cnt) {
  243. fprintf(stderr, "No valid symbol.\n");
  244. exit(1);
  245. }
  246. }
  247. static void read_map(FILE *in)
  248. {
  249. struct sym_entry *sym;
  250. while (!feof(in)) {
  251. sym = read_symbol(in);
  252. if (!sym)
  253. continue;
  254. sym->start_pos = table_cnt;
  255. if (table_cnt >= table_size) {
  256. table_size += 10000;
  257. table = realloc(table, sizeof(*table) * table_size);
  258. if (!table) {
  259. fprintf(stderr, "out of memory\n");
  260. exit (1);
  261. }
  262. }
  263. table[table_cnt++] = sym;
  264. }
  265. }
  266. static void output_label(const char *label)
  267. {
  268. printf(".globl %s\n", label);
  269. printf("\tALGN\n");
  270. printf("%s:\n", label);
  271. }
  272. /* Provide proper symbols relocatability by their '_text' relativeness. */
  273. static void output_address(unsigned long long addr)
  274. {
  275. if (_text <= addr)
  276. printf("\tPTR\t_text + %#llx\n", addr - _text);
  277. else
  278. printf("\tPTR\t_text - %#llx\n", _text - addr);
  279. }
  280. /* uncompress a compressed symbol. When this function is called, the best table
  281. * might still be compressed itself, so the function needs to be recursive */
  282. static int expand_symbol(const unsigned char *data, int len, char *result)
  283. {
  284. int c, rlen, total=0;
  285. while (len) {
  286. c = *data;
  287. /* if the table holds a single char that is the same as the one
  288. * we are looking for, then end the search */
  289. if (best_table[c][0]==c && best_table_len[c]==1) {
  290. *result++ = c;
  291. total++;
  292. } else {
  293. /* if not, recurse and expand */
  294. rlen = expand_symbol(best_table[c], best_table_len[c], result);
  295. total += rlen;
  296. result += rlen;
  297. }
  298. data++;
  299. len--;
  300. }
  301. *result=0;
  302. return total;
  303. }
  304. static int symbol_absolute(const struct sym_entry *s)
  305. {
  306. return s->percpu_absolute;
  307. }
  308. static void write_src(void)
  309. {
  310. unsigned int i, k, off;
  311. unsigned int best_idx[256];
  312. unsigned int *markers;
  313. char buf[KSYM_NAME_LEN];
  314. printf("#include <asm/bitsperlong.h>\n");
  315. printf("#if BITS_PER_LONG == 64\n");
  316. printf("#define PTR .quad\n");
  317. printf("#define ALGN .balign 8\n");
  318. printf("#else\n");
  319. printf("#define PTR .long\n");
  320. printf("#define ALGN .balign 4\n");
  321. printf("#endif\n");
  322. printf("\t.section .rodata, \"a\"\n");
  323. if (!base_relative)
  324. output_label("kallsyms_addresses");
  325. else
  326. output_label("kallsyms_offsets");
  327. for (i = 0; i < table_cnt; i++) {
  328. if (base_relative) {
  329. /*
  330. * Use the offset relative to the lowest value
  331. * encountered of all relative symbols, and emit
  332. * non-relocatable fixed offsets that will be fixed
  333. * up at runtime.
  334. */
  335. long long offset;
  336. int overflow;
  337. if (!absolute_percpu) {
  338. offset = table[i]->addr - relative_base;
  339. overflow = (offset < 0 || offset > UINT_MAX);
  340. } else if (symbol_absolute(table[i])) {
  341. offset = table[i]->addr;
  342. overflow = (offset < 0 || offset > INT_MAX);
  343. } else {
  344. offset = relative_base - table[i]->addr - 1;
  345. overflow = (offset < INT_MIN || offset >= 0);
  346. }
  347. if (overflow) {
  348. fprintf(stderr, "kallsyms failure: "
  349. "%s symbol value %#llx out of range in relative mode\n",
  350. symbol_absolute(table[i]) ? "absolute" : "relative",
  351. table[i]->addr);
  352. exit(EXIT_FAILURE);
  353. }
  354. printf("\t.long\t%#x\n", (int)offset);
  355. } else if (!symbol_absolute(table[i])) {
  356. output_address(table[i]->addr);
  357. } else {
  358. printf("\tPTR\t%#llx\n", table[i]->addr);
  359. }
  360. }
  361. printf("\n");
  362. if (base_relative) {
  363. output_label("kallsyms_relative_base");
  364. output_address(relative_base);
  365. printf("\n");
  366. }
  367. output_label("kallsyms_num_syms");
  368. printf("\t.long\t%u\n", table_cnt);
  369. printf("\n");
  370. /* table of offset markers, that give the offset in the compressed stream
  371. * every 256 symbols */
  372. markers = malloc(sizeof(unsigned int) * ((table_cnt + 255) / 256));
  373. if (!markers) {
  374. fprintf(stderr, "kallsyms failure: "
  375. "unable to allocate required memory\n");
  376. exit(EXIT_FAILURE);
  377. }
  378. output_label("kallsyms_names");
  379. off = 0;
  380. for (i = 0; i < table_cnt; i++) {
  381. if ((i & 0xFF) == 0)
  382. markers[i >> 8] = off;
  383. printf("\t.byte 0x%02x", table[i]->len);
  384. for (k = 0; k < table[i]->len; k++)
  385. printf(", 0x%02x", table[i]->sym[k]);
  386. printf("\n");
  387. off += table[i]->len + 1;
  388. }
  389. printf("\n");
  390. output_label("kallsyms_markers");
  391. for (i = 0; i < ((table_cnt + 255) >> 8); i++)
  392. printf("\t.long\t%u\n", markers[i]);
  393. printf("\n");
  394. free(markers);
  395. output_label("kallsyms_token_table");
  396. off = 0;
  397. for (i = 0; i < 256; i++) {
  398. best_idx[i] = off;
  399. expand_symbol(best_table[i], best_table_len[i], buf);
  400. printf("\t.asciz\t\"%s\"\n", buf);
  401. off += strlen(buf) + 1;
  402. }
  403. printf("\n");
  404. output_label("kallsyms_token_index");
  405. for (i = 0; i < 256; i++)
  406. printf("\t.short\t%d\n", best_idx[i]);
  407. printf("\n");
  408. }
  409. /* table lookup compression functions */
  410. /* count all the possible tokens in a symbol */
  411. static void learn_symbol(const unsigned char *symbol, int len)
  412. {
  413. int i;
  414. for (i = 0; i < len - 1; i++)
  415. token_profit[ symbol[i] + (symbol[i + 1] << 8) ]++;
  416. }
  417. /* decrease the count for all the possible tokens in a symbol */
  418. static void forget_symbol(const unsigned char *symbol, int len)
  419. {
  420. int i;
  421. for (i = 0; i < len - 1; i++)
  422. token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--;
  423. }
  424. /* do the initial token count */
  425. static void build_initial_tok_table(void)
  426. {
  427. unsigned int i;
  428. for (i = 0; i < table_cnt; i++)
  429. learn_symbol(table[i]->sym, table[i]->len);
  430. }
  431. static unsigned char *find_token(unsigned char *str, int len,
  432. const unsigned char *token)
  433. {
  434. int i;
  435. for (i = 0; i < len - 1; i++) {
  436. if (str[i] == token[0] && str[i+1] == token[1])
  437. return &str[i];
  438. }
  439. return NULL;
  440. }
  441. /* replace a given token in all the valid symbols. Use the sampled symbols
  442. * to update the counts */
  443. static void compress_symbols(const unsigned char *str, int idx)
  444. {
  445. unsigned int i, len, size;
  446. unsigned char *p1, *p2;
  447. for (i = 0; i < table_cnt; i++) {
  448. len = table[i]->len;
  449. p1 = table[i]->sym;
  450. /* find the token on the symbol */
  451. p2 = find_token(p1, len, str);
  452. if (!p2) continue;
  453. /* decrease the counts for this symbol's tokens */
  454. forget_symbol(table[i]->sym, len);
  455. size = len;
  456. do {
  457. *p2 = idx;
  458. p2++;
  459. size -= (p2 - p1);
  460. memmove(p2, p2 + 1, size);
  461. p1 = p2;
  462. len--;
  463. if (size < 2) break;
  464. /* find the token on the symbol */
  465. p2 = find_token(p1, size, str);
  466. } while (p2);
  467. table[i]->len = len;
  468. /* increase the counts for this symbol's new tokens */
  469. learn_symbol(table[i]->sym, len);
  470. }
  471. }
  472. /* search the token with the maximum profit */
  473. static int find_best_token(void)
  474. {
  475. int i, best, bestprofit;
  476. bestprofit=-10000;
  477. best = 0;
  478. for (i = 0; i < 0x10000; i++) {
  479. if (token_profit[i] > bestprofit) {
  480. best = i;
  481. bestprofit = token_profit[i];
  482. }
  483. }
  484. return best;
  485. }
  486. /* this is the core of the algorithm: calculate the "best" table */
  487. static void optimize_result(void)
  488. {
  489. int i, best;
  490. /* using the '\0' symbol last allows compress_symbols to use standard
  491. * fast string functions */
  492. for (i = 255; i >= 0; i--) {
  493. /* if this table slot is empty (it is not used by an actual
  494. * original char code */
  495. if (!best_table_len[i]) {
  496. /* find the token with the best profit value */
  497. best = find_best_token();
  498. if (token_profit[best] == 0)
  499. break;
  500. /* place it in the "best" table */
  501. best_table_len[i] = 2;
  502. best_table[i][0] = best & 0xFF;
  503. best_table[i][1] = (best >> 8) & 0xFF;
  504. /* replace this token in all the valid symbols */
  505. compress_symbols(best_table[i], i);
  506. }
  507. }
  508. }
  509. /* start by placing the symbols that are actually used on the table */
  510. static void insert_real_symbols_in_table(void)
  511. {
  512. unsigned int i, j, c;
  513. for (i = 0; i < table_cnt; i++) {
  514. for (j = 0; j < table[i]->len; j++) {
  515. c = table[i]->sym[j];
  516. best_table[c][0]=c;
  517. best_table_len[c]=1;
  518. }
  519. }
  520. }
  521. static void optimize_token_table(void)
  522. {
  523. build_initial_tok_table();
  524. insert_real_symbols_in_table();
  525. optimize_result();
  526. }
  527. /* guess for "linker script provide" symbol */
  528. static int may_be_linker_script_provide_symbol(const struct sym_entry *se)
  529. {
  530. const char *symbol = sym_name(se);
  531. int len = se->len - 1;
  532. if (len < 8)
  533. return 0;
  534. if (symbol[0] != '_' || symbol[1] != '_')
  535. return 0;
  536. /* __start_XXXXX */
  537. if (!memcmp(symbol + 2, "start_", 6))
  538. return 1;
  539. /* __stop_XXXXX */
  540. if (!memcmp(symbol + 2, "stop_", 5))
  541. return 1;
  542. /* __end_XXXXX */
  543. if (!memcmp(symbol + 2, "end_", 4))
  544. return 1;
  545. /* __XXXXX_start */
  546. if (!memcmp(symbol + len - 6, "_start", 6))
  547. return 1;
  548. /* __XXXXX_end */
  549. if (!memcmp(symbol + len - 4, "_end", 4))
  550. return 1;
  551. return 0;
  552. }
  553. static int compare_symbols(const void *a, const void *b)
  554. {
  555. const struct sym_entry *sa = *(const struct sym_entry **)a;
  556. const struct sym_entry *sb = *(const struct sym_entry **)b;
  557. int wa, wb;
  558. /* sort by address first */
  559. if (sa->addr > sb->addr)
  560. return 1;
  561. if (sa->addr < sb->addr)
  562. return -1;
  563. /* sort by "weakness" type */
  564. wa = (sa->sym[0] == 'w') || (sa->sym[0] == 'W');
  565. wb = (sb->sym[0] == 'w') || (sb->sym[0] == 'W');
  566. if (wa != wb)
  567. return wa - wb;
  568. /* sort by "linker script provide" type */
  569. wa = may_be_linker_script_provide_symbol(sa);
  570. wb = may_be_linker_script_provide_symbol(sb);
  571. if (wa != wb)
  572. return wa - wb;
  573. /* sort by the number of prefix underscores */
  574. wa = strspn(sym_name(sa), "_");
  575. wb = strspn(sym_name(sb), "_");
  576. if (wa != wb)
  577. return wa - wb;
  578. /* sort by initial order, so that other symbols are left undisturbed */
  579. return sa->start_pos - sb->start_pos;
  580. }
  581. static void sort_symbols(void)
  582. {
  583. qsort(table, table_cnt, sizeof(table[0]), compare_symbols);
  584. }
  585. static void make_percpus_absolute(void)
  586. {
  587. unsigned int i;
  588. for (i = 0; i < table_cnt; i++)
  589. if (symbol_in_range(table[i], &percpu_range, 1)) {
  590. /*
  591. * Keep the 'A' override for percpu symbols to
  592. * ensure consistent behavior compared to older
  593. * versions of this tool.
  594. */
  595. table[i]->sym[0] = 'A';
  596. table[i]->percpu_absolute = 1;
  597. }
  598. }
  599. /* find the minimum non-absolute symbol address */
  600. static void record_relative_base(void)
  601. {
  602. unsigned int i;
  603. for (i = 0; i < table_cnt; i++)
  604. if (!symbol_absolute(table[i])) {
  605. /*
  606. * The table is sorted by address.
  607. * Take the first non-absolute symbol value.
  608. */
  609. relative_base = table[i]->addr;
  610. return;
  611. }
  612. }
  613. int main(int argc, char **argv)
  614. {
  615. if (argc >= 2) {
  616. int i;
  617. for (i = 1; i < argc; i++) {
  618. if(strcmp(argv[i], "--all-symbols") == 0)
  619. all_symbols = 1;
  620. else if (strcmp(argv[i], "--absolute-percpu") == 0)
  621. absolute_percpu = 1;
  622. else if (strcmp(argv[i], "--base-relative") == 0)
  623. base_relative = 1;
  624. else
  625. usage();
  626. }
  627. } else if (argc != 1)
  628. usage();
  629. read_map(stdin);
  630. shrink_table();
  631. if (absolute_percpu)
  632. make_percpus_absolute();
  633. sort_symbols();
  634. if (base_relative)
  635. record_relative_base();
  636. optimize_token_table();
  637. write_src();
  638. return 0;
  639. }