kallsyms.c source code [linux/scripts/kallsyms.c]

1	/ Generate assembler source containing symbol information*
2	*
3	* Copyright 2002 by Kai Germaschewski
4	*
5	* This software may be used and distributed according to the terms
6	* of the GNU General Public License, incorporated herein by reference.
7	*
8	* Usage: kallsyms [--all-symbols] in.map > out.S
9	*
10	* Table compression uses all the unused char codes on the symbols and
11	* maps these to the most used substrings (tokens). For instance, it might
12	* map char code 0xF7 to represent "write_" and then in every symbol where
13	* "write_" appears it can be replaced by 0xF7, saving 5 bytes.
14	* The used codes themselves are also placed in the table so that the
15	* decompresion can work without "special cases".
16	* Applied to kernel symbols, this usually produces a compression ratio
17	* of about 50%.
18	*
19	*/
20
21	#include <errno.h>
22	#include <getopt.h>
23	#include <stdbool.h>
24	#include <stdio.h>
25	#include <stdlib.h>
26	#include <string.h>
27	#include <ctype.h>
28	#include <limits.h>
29
30	#include <xalloc.h>
31
32	#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
33
34	#define KSYM_NAME_LEN 512
35
36	struct sym_entry {
37	unsigned long long addr;
38	unsigned int len;
39	unsigned int seq;
40	unsigned char sym[];
41	};
42
43	struct addr_range {
44	const char start_sym, end_sym;
45	unsigned long long start, end;
46	};
47
48	static unsigned long long _text;
49	static unsigned long long relative_base;
50	static struct addr_range text_ranges[] = {
51	{ "_stext", "_etext" },
52	{ "_sinittext", "_einittext" },
53	};
54	#define text_range_text (&text_ranges[0])
55	#define text_range_inittext (&text_ranges[1])
56
57	static struct sym_entry **table;
58	static unsigned int table_size, table_cnt;
59	static int all_symbols;
60
61	static int token_profit[`0x10000`];
62
63	/ the table that holds the result of the compression /
64	static unsigned char best_table[`256`][`2`];
65	static unsigned char best_table_len[`256`];
66
67
68	static void usage(void)
69	{
70	fprintf(stderr, format: "Usage: kallsyms [--all-symbols] in.map > out.S\n");
71	exit(status: `1`);
72	}
73
74	static char sym_name(const* struct sym_entry *s)
75	{
76	return (char *)s->sym + `1`;
77	}
78
79	static bool is_ignored_symbol(const char name, char* type)
80	{
81	if (type == `'u'` \|\| type == `'n'`)
82	return true;
83
84	if (toupper(type) == `'A'`) {
85	/ Keep these useful absolute symbols /
86	if (strcmp(s1: name, s2: "__kernel_syscall_via_break") &&
87	strcmp(s1: name, s2: "__kernel_syscall_via_epc") &&
88	strcmp(s1: name, s2: "__kernel_sigtramp") &&
89	strcmp(s1: name, s2: "__gp"))
90	return true;
91	}
92
93	return false;
94	}
95
96	static void check_symbol_range(const char sym, unsigned* long long addr,
97	struct addr_range ranges, int* entries)
98	{
99	size_t i;
100	struct addr_range *ar;
101
102	for (i = `0`; i < entries; ++i) {
103	ar = &ranges[i];
104
105	if (strcmp(s1: sym, s2: ar->start_sym) == `0`) {
106	ar->start = addr;
107	return;
108	} else if (strcmp(s1: sym, s2: ar->end_sym) == `0`) {
109	ar->end = addr;
110	return;
111	}
112	}
113	}
114
115	static struct sym_entry read_symbol(FILE in, char *buf, size_t buf_len)
116	{
117	char name, type, p;
118	unsigned long long addr;
119	size_t len;
120	ssize_t readlen;
121	struct sym_entry *sym;
122
123	errno = `0`;
124	readlen = getline(lineptr: buf, n: buf_len, stream: in);
125	if (readlen < `0`) {
126	if (errno) {
127	perror(s: "read_symbol");
128	exit(EXIT_FAILURE);
129	}
130	return NULL;
131	}
132
133	if ((*buf)[readlen - `1`] == `'\n'`)
134	(*buf)[readlen - `1`] = `0`;
135
136	addr = strtoull(nptr: *buf, endptr: &p, base: `16`);
137
138	if (buf == p \|\| p++ != `' '` \|\| !isascii((type = p++)) \|\| p++ != `' '`) {
139	fprintf(stderr, format: "line format error\n");
140	exit(EXIT_FAILURE);
141	}
142
143	name = p;
144	len = strlen(s: name);
145
146	if (len >= KSYM_NAME_LEN) {
147	fprintf(stderr, format: "Symbol %s too long for kallsyms (%zu >= %d).\n"
148	"Please increase KSYM_NAME_LEN both in kernel and kallsyms.c\n",
149	name, len, KSYM_NAME_LEN);
150	return NULL;
151	}
152
153	if (strcmp(s1: name, s2: "_text") == `0`)
154	_text = addr;
155
156	/ Ignore most absolute/undefined (?) symbols. /
157	if (is_ignored_symbol(name, type))
158	return NULL;
159
160	check_symbol_range(sym: name, addr, ranges: text_ranges, ARRAY_SIZE(text_ranges));
161
162	/ include the type field in the symbol name, so that it gets*
163	* compressed together */
164	len++;
165
166	sym = xmalloc(sizeof(*sym) + len + `1`);
167	sym->addr = addr;
168	sym->len = len;
169	sym->sym[`0`] = type;
170	strcpy(dest: sym_name(s: sym), src: name);
171
172	return sym;
173	}
174
175	static int symbol_in_range(const struct sym_entry *s,
176	const struct addr_range ranges, int* entries)
177	{
178	size_t i;
179	const struct addr_range *ar;
180
181	for (i = `0`; i < entries; ++i) {
182	ar = &ranges[i];
183
184	if (s->addr >= ar->start && s->addr <= ar->end)
185	return `1`;
186	}
187
188	return `0`;
189	}
190
191	static bool string_starts_with(const char s, const* char *prefix)
192	{
193	return strncmp(s1: s, s2: prefix, n: strlen(s: prefix)) == `0`;
194	}
195
196	static int symbol_valid(const struct sym_entry *s)
197	{
198	const char *name = sym_name(s);
199
200	/ if --all-symbols is not specified, then symbols outside the text*
201	* and inittext sections are discarded */
202	if (!all_symbols) {
203	/*
204	* Symbols starting with __start and __stop are used to denote
205	* section boundaries, and should always be included:
206	*/
207	if (string_starts_with(s: name, prefix: "__start_") \|\|
208	string_starts_with(s: name, prefix: "__stop_"))
209	return `1`;
210
211	if (symbol_in_range(s, ranges: text_ranges,
212	ARRAY_SIZE(text_ranges)) == `0`)
213	return `0`;
214	/ Corner case. Discard any symbols with the same value as*
215	* _etext _einittext; they can move between pass 1 and 2 when
216	* the kallsyms data are added. If these symbols move then
217	* they may get dropped in pass 2, which breaks the kallsyms
218	* rules.
219	*/
220	if ((s->addr == text_range_text->end &&
221	strcmp(s1: name, text_range_text->end_sym)) \|\|
222	(s->addr == text_range_inittext->end &&
223	strcmp(s1: name, text_range_inittext->end_sym)))
224	return `0`;
225	}
226
227	return `1`;
228	}
229
230	/ remove all the invalid symbols from the table /
231	static void shrink_table(void)
232	{
233	unsigned int i, pos;
234
235	pos = `0`;
236	for (i = `0`; i < table_cnt; i++) {
237	if (symbol_valid(s: table[i])) {
238	if (pos != i)
239	table[pos] = table[i];
240	pos++;
241	} else {
242	free(ptr: table[i]);
243	}
244	}
245	table_cnt = pos;
246	}
247
248	static void read_map(const char *in)
249	{
250	FILE *fp;
251	struct sym_entry *sym;
252	char *buf = NULL;
253	size_t buflen = `0`;
254
255	fp = fopen(filename: in, modes: "r");
256	if (!fp) {
257	perror(s: in);
258	exit(status: `1`);
259	}
260
261	while (!feof(stream: fp)) {
262	sym = read_symbol(in: fp, buf: &buf, buf_len: &buflen);
263	if (!sym)
264	continue;
265
266	sym->seq = table_cnt;
267
268	if (table_cnt >= table_size) {
269	table_size += `10000`;
270	table = xrealloc(table, sizeof(table) table_size);
271	}
272
273	table[table_cnt++] = sym;
274	}
275
276	free(ptr: buf);
277	fclose(stream: fp);
278	}
279
280	static void output_label(const char *label)
281	{
282	printf(format: ".globl %s\n", label);
283	printf(format: "\tALGN\n");
284	printf(format: "%s:\n", label);
285	}
286
287	/ uncompress a compressed symbol. When this function is called, the best table*
288	* might still be compressed itself, so the function needs to be recursive */
289	static int expand_symbol(const unsigned char data, int* len, char *result)
290	{
291	int c, rlen, total=`0`;
292
293	while (len) {
294	c = *data;
295	/ if the table holds a single char that is the same as the one*
296	* we are looking for, then end the search */
297	if (best_table[c][`0`]==c && best_table_len[c]==`1`) {
298	*result++ = c;
299	total++;
300	} else {
301	/ if not, recurse and expand /
302	rlen = expand_symbol(data: best_table[c], len: best_table_len[c], result);
303	total += rlen;
304	result += rlen;
305	}
306	data++;
307	len--;
308	}
309	*result=`0`;
310
311	return total;
312	}
313
314	static int compare_names(const void a, const* void *b)
315	{
316	int ret;
317	const struct sym_entry sa = (const struct sym_entry **)a;
318	const struct sym_entry sb = (const struct sym_entry **)b;
319
320	ret = strcmp(s1: sym_name(s: sa), s2: sym_name(s: sb));
321	if (!ret) {
322	if (sa->addr > sb->addr)
323	return `1`;
324	else if (sa->addr < sb->addr)
325	return -`1`;
326
327	/ keep old order /
328	return (int)(sa->seq - sb->seq);
329	}
330
331	return ret;
332	}
333
334	static void sort_symbols_by_name(void)
335	{
336	qsort(base: table, nmemb: table_cnt, size: sizeof(table[`0`]), compar: compare_names);
337	}
338
339	static void write_src(void)
340	{
341	unsigned int i, k, off;
342	unsigned int best_idx[`256`];
343	unsigned int *markers, markers_cnt;
344	char buf[KSYM_NAME_LEN];
345
346	printf(format: "#include <asm/bitsperlong.h>\n");
347	printf(format: "#if BITS_PER_LONG == 64\n");
348	printf(format: "#define PTR .quad\n");
349	printf(format: "#define ALGN .balign 8\n");
350	printf(format: "#else\n");
351	printf(format: "#define PTR .long\n");
352	printf(format: "#define ALGN .balign 4\n");
353	printf(format: "#endif\n");
354
355	printf(format: "\t.section .rodata, \"a\"\n");
356
357	output_label(label: "kallsyms_num_syms");
358	printf(format: "\t.long\t%u\n", table_cnt);
359	printf(format: "\n");
360
361	/ table of offset markers, that give the offset in the compressed stream*
362	* every 256 symbols */
363	markers_cnt = (table_cnt + `255`) / `256`;
364	markers = xmalloc(sizeof(markers) markers_cnt);
365
366	output_label(label: "kallsyms_names");
367	off = `0`;
368	for (i = `0`; i < table_cnt; i++) {
369	if ((i & `0xFF`) == `0`)
370	markers[i >> `8`] = off;
371	table[i]->seq = i;
372
373	/ There cannot be any symbol of length zero. /
374	if (table[i]->len == `0`) {
375	fprintf(stderr, format: "kallsyms failure: "
376	"unexpected zero symbol length\n");
377	exit(EXIT_FAILURE);
378	}
379
380	/ Only lengths that fit in up-to-two-byte ULEB128 are supported. /
381	if (table[i]->len > `0x3FFF`) {
382	fprintf(stderr, format: "kallsyms failure: "
383	"unexpected huge symbol length\n");
384	exit(EXIT_FAILURE);
385	}
386
387	/ Encode length with ULEB128. /
388	if (table[i]->len <= `0x7F`) {
389	/ Most symbols use a single byte for the length. /
390	printf(format: "\t.byte 0x%02x", table[i]->len);
391	off += table[i]->len + `1`;
392	} else {
393	/ "Big" symbols use two bytes. /
394	printf(format: "\t.byte 0x%02x, 0x%02x",
395	(table[i]->len & `0x7F`) \| `0x80`,
396	(table[i]->len >> `7`) & `0x7F`);
397	off += table[i]->len + `2`;
398	}
399	for (k = `0`; k < table[i]->len; k++)
400	printf(format: ", 0x%02x", table[i]->sym[k]);
401
402	/*
403	* Now that we wrote out the compressed symbol name, restore the
404	* original name and print it in the comment.
405	*/
406	expand_symbol(data: table[i]->sym, len: table[i]->len, result: buf);
407	strcpy(dest: (char *)table[i]->sym, src: buf);
408	printf(format: "\t/* %s */\n", table[i]->sym);
409	}
410	printf(format: "\n");
411
412	output_label(label: "kallsyms_markers");
413	for (i = `0`; i < markers_cnt; i++)
414	printf(format: "\t.long\t%u\n", markers[i]);
415	printf(format: "\n");
416
417	free(ptr: markers);
418
419	output_label(label: "kallsyms_token_table");
420	off = `0`;
421	for (i = `0`; i < `256`; i++) {
422	best_idx[i] = off;
423	expand_symbol(data: best_table[i], len: best_table_len[i], result: buf);
424	printf(format: "\t.asciz\t\"%s\"\n", buf);
425	off += strlen(s: buf) + `1`;
426	}
427	printf(format: "\n");
428
429	output_label(label: "kallsyms_token_index");
430	for (i = `0`; i < `256`; i++)
431	printf(format: "\t.short\t%d\n", best_idx[i]);
432	printf(format: "\n");
433
434	output_label(label: "kallsyms_offsets");
435
436	for (i = `0`; i < table_cnt; i++) {
437	/*
438	* Use the offset relative to the lowest value
439	* encountered of all relative symbols, and emit
440	* non-relocatable fixed offsets that will be fixed
441	* up at runtime.
442	*/
443
444	long long offset;
445
446	offset = table[i]->addr - relative_base;
447	if (offset < `0` \|\| offset > UINT_MAX) {
448	fprintf(stderr, format: "kallsyms failure: "
449	"relative symbol value %#llx out of range\n",
450	table[i]->addr);
451	exit(EXIT_FAILURE);
452	}
453	printf(format: "\t.long\t%#x\t/* %s /\n", (int*)offset, table[i]->sym);
454	}
455	printf(format: "\n");
456
457	output_label(label: "kallsyms_relative_base");
458	/ Provide proper symbols relocatability by their '_text' relativeness. /
459	if (_text <= relative_base)
460	printf(format: "\tPTR\t_text + %#llx\n", relative_base - _text);
461	else
462	printf(format: "\tPTR\t_text - %#llx\n", _text - relative_base);
463	printf(format: "\n");
464
465	sort_symbols_by_name();
466	output_label(label: "kallsyms_seqs_of_names");
467	for (i = `0`; i < table_cnt; i++)
468	printf(format: "\t.byte 0x%02x, 0x%02x, 0x%02x\t/* %s */\n",
469	(unsigned char)(table[i]->seq >> `16`),
470	(unsigned char)(table[i]->seq >> `8`),
471	(unsigned char)(table[i]->seq >> `0`),
472	table[i]->sym);
473	printf(format: "\n");
474	}
475
476
477	/ table lookup compression functions /
478
479	/ count all the possible tokens in a symbol /
480	static void learn_symbol(const unsigned char symbol, int* len)
481	{
482	int i;
483
484	for (i = `0`; i < len - `1`; i++)
485	token_profit[ symbol[i] + (symbol[i + `1`] << `8`) ]++;
486	}
487
488	/ decrease the count for all the possible tokens in a symbol /
489	static void forget_symbol(const unsigned char symbol, int* len)
490	{
491	int i;
492
493	for (i = `0`; i < len - `1`; i++)
494	token_profit[ symbol[i] + (symbol[i + `1`] << `8`) ]--;
495	}
496
497	/ do the initial token count /
498	static void build_initial_token_table(void)
499	{
500	unsigned int i;
501
502	for (i = `0`; i < table_cnt; i++)
503	learn_symbol(symbol: table[i]->sym, len: table[i]->len);
504	}
505
506	static unsigned char find_token(unsigned* char str, int* len,
507	const unsigned char *token)
508	{
509	int i;
510
511	for (i = `0`; i < len - `1`; i++) {
512	if (str[i] == token[`0`] && str[i+`1`] == token[`1`])
513	return &str[i];
514	}
515	return NULL;
516	}
517
518	/ replace a given token in all the valid symbols. Use the sampled symbols*
519	* to update the counts */
520	static void compress_symbols(const unsigned char str, int* idx)
521	{
522	unsigned int i, len, size;
523	unsigned char p1, p2;
524
525	for (i = `0`; i < table_cnt; i++) {
526
527	len = table[i]->len;
528	p1 = table[i]->sym;
529
530	/ find the token on the symbol /
531	p2 = find_token(str: p1, len, token: str);
532	if (!p2) continue;
533
534	/ decrease the counts for this symbol's tokens /
535	forget_symbol(symbol: table[i]->sym, len);
536
537	size = len;
538
539	do {
540	*p2 = idx;
541	p2++;
542	size -= (p2 - p1);
543	memmove(dest: p2, src: p2 + `1`, n: size);
544	p1 = p2;
545	len--;
546
547	if (size < `2`) break;
548
549	/ find the token on the symbol /
550	p2 = find_token(str: p1, len: size, token: str);
551
552	} while (p2);
553
554	table[i]->len = len;
555
556	/ increase the counts for this symbol's new tokens /
557	learn_symbol(symbol: table[i]->sym, len);
558	}
559	}
560
561	/ search the token with the maximum profit /
562	static int find_best_token(void)
563	{
564	int i, best, bestprofit;
565
566	bestprofit=-`10000`;
567	best = `0`;
568
569	for (i = `0`; i < `0x10000`; i++) {
570	if (token_profit[i] > bestprofit) {
571	best = i;
572	bestprofit = token_profit[i];
573	}
574	}
575	return best;
576	}
577
578	/ this is the core of the algorithm: calculate the "best" table /
579	static void optimize_result(void)
580	{
581	int i, best;
582
583	/ using the '\0' symbol last allows compress_symbols to use standard*
584	* fast string functions */
585	for (i = `255`; i >= `0`; i--) {
586
587	/ if this table slot is empty (it is not used by an actual*
588	* original char code */
589	if (!best_table_len[i]) {
590
591	/ find the token with the best profit value /
592	best = find_best_token();
593	if (token_profit[best] == `0`)
594	break;
595
596	/ place it in the "best" table /
597	best_table_len[i] = `2`;
598	best_table[i][`0`] = best & `0xFF`;
599	best_table[i][`1`] = (best >> `8`) & `0xFF`;
600
601	/ replace this token in all the valid symbols /
602	compress_symbols(str: best_table[i], idx: i);
603	}
604	}
605	}
606
607	/ start by placing the symbols that are actually used on the table /
608	static void insert_real_symbols_in_table(void)
609	{
610	unsigned int i, j, c;
611
612	for (i = `0`; i < table_cnt; i++) {
613	for (j = `0`; j < table[i]->len; j++) {
614	c = table[i]->sym[j];
615	best_table[c][`0`]=c;
616	best_table_len[c]=`1`;
617	}
618	}
619	}
620
621	static void optimize_token_table(void)
622	{
623	build_initial_token_table();
624
625	insert_real_symbols_in_table();
626
627	optimize_result();
628	}
629
630	/ guess for "linker script provide" symbol /
631	static int may_be_linker_script_provide_symbol(const struct sym_entry *se)
632	{
633	const char *symbol = sym_name(s: se);
634	int len = se->len - `1`;
635
636	if (len < `8`)
637	return `0`;
638
639	if (symbol[`0`] != `'_'` \|\| symbol[`1`] != `'_'`)
640	return `0`;
641
642	/ __start_XXXXX /
643	if (!memcmp(s1: symbol + `2`, s2: "start_", n: `6`))
644	return `1`;
645
646	/ __stop_XXXXX /
647	if (!memcmp(s1: symbol + `2`, s2: "stop_", n: `5`))
648	return `1`;
649
650	/ __end_XXXXX /
651	if (!memcmp(s1: symbol + `2`, s2: "end_", n: `4`))
652	return `1`;
653
654	/ __XXXXX_start /
655	if (!memcmp(s1: symbol + len - `6`, s2: "_start", n: `6`))
656	return `1`;
657
658	/ __XXXXX_end /
659	if (!memcmp(s1: symbol + len - `4`, s2: "_end", n: `4`))
660	return `1`;
661
662	return `0`;
663	}
664
665	static int compare_symbols(const void a, const* void *b)
666	{
667	const struct sym_entry sa = (const struct sym_entry **)a;
668	const struct sym_entry sb = (const struct sym_entry **)b;
669	int wa, wb;
670
671	/ sort by address first /
672	if (sa->addr > sb->addr)
673	return `1`;
674	if (sa->addr < sb->addr)
675	return -`1`;
676
677	/ sort by "weakness" type /
678	wa = (sa->sym[`0`] == `'w'`) \|\| (sa->sym[`0`] == `'W'`);
679	wb = (sb->sym[`0`] == `'w'`) \|\| (sb->sym[`0`] == `'W'`);
680	if (wa != wb)
681	return wa - wb;
682
683	/ sort by "linker script provide" type /
684	wa = may_be_linker_script_provide_symbol(se: sa);
685	wb = may_be_linker_script_provide_symbol(se: sb);
686	if (wa != wb)
687	return wa - wb;
688
689	/ sort by the number of prefix underscores /
690	wa = strspn(s: sym_name(s: sa), accept: "_");
691	wb = strspn(s: sym_name(s: sb), accept: "_");
692	if (wa != wb)
693	return wa - wb;
694
695	/ sort by initial order, so that other symbols are left undisturbed /
696	return sa->seq - sb->seq;
697	}
698
699	static void sort_symbols(void)
700	{
701	qsort(base: table, nmemb: table_cnt, size: sizeof(table[`0`]), compar: compare_symbols);
702	}
703
704	/ find the minimum non-absolute symbol address /
705	static void record_relative_base(void)
706	{
707	/*
708	* The table is sorted by address.
709	* Take the first symbol value.
710	*/
711	if (table_cnt)
712	relative_base = table[`0`]->addr;
713	}
714
715	int main(int argc, char **argv)
716	{
717	while (`1`) {
718	static const struct option long_options[] = {
719	{.name: "all-symbols", no_argument, .flag: &all_symbols, .val: `1`},
720	{},
721	};
722
723	int c = getopt_long(argc: argc, argv: argv, shortopts: "", longopts: long_options, NULL);
724
725	if (c == -`1`)
726	break;
727	if (c != `0`)
728	usage();
729	}
730
731	if (optind >= argc)
732	usage();
733
734	read_map(in: argv[optind]);
735	shrink_table();
736	sort_symbols();
737	record_relative_base();
738	optimize_token_table();
739	write_src();
740
741	return `0`;
742	}
743

source code of linux/scripts/kallsyms.c