tcc-stupidos/libtcc/tccelf.c
2025-02-11 13:07:04 +01:00

2845 lines
87 KiB
C

/*
* ELF file handling for TCC
*
* Copyright (c) 2001-2004 Fabrice Bellard
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif /* HAVE_CONFIG_H */
#include <tcc.h>
#include <tcc/path.h>
#include <tcc/memory.h>
#include "utils/string.h"
/* Define this to get some debug output during relocation processing. */
#undef DEBUG_RELOC
/********************************************************/
/* global variables */
/* elf version information */
struct sym_version {
char *lib;
char *version;
int out_index;
int prev_same_lib;
};
#define nb_sym_versions s1->nb_sym_versions
#define sym_versions s1->sym_versions
#define nb_sym_to_version s1->nb_sym_to_version
#define sym_to_version s1->sym_to_version
#define dt_verneednum s1->dt_verneednum
#define versym_section s1->versym_section
#define verneed_section s1->verneed_section
/* special flag to indicate that the section should not be linked to the other ones */
#define SHF_PRIVATE 0x80000000
/* section is dynsymtab_section */
#define SHF_DYNSYM 0x40000000
#ifdef TCC_TARGET_PE
#define shf_RELRO SHF_ALLOC
static const char rdata[] = ".rdata";
#else
#define shf_RELRO SHF_ALLOC /* eventually made SHF_WRITE in sort_sections() */
static const char rdata[] = ".data.ro";
#endif
/* ------------------------------------------------------------------------- */
void tccelf_new(TCCState *s)
{
TCCState *s1 = s;
/* no section zero */
dynarray_add(&s->sections, &s->nb_sections, NULL);
/* create standard sections */
text_section = new_section(s, ".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR);
data_section = new_section(s, ".data", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE);
/* create ro data section (make ro after relocation done with GNU_RELRO) */
rodata_section = new_section(s, rdata, SHT_PROGBITS, shf_RELRO);
bss_section = new_section(s, ".bss", SHT_NOBITS, SHF_ALLOC | SHF_WRITE);
common_section = new_section(s, ".common", SHT_NOBITS, SHF_PRIVATE);
common_section->sh_num = SHN_COMMON;
/* symbols are always generated for linking stage */
symtab_section = new_symtab(s, ".symtab", SHT_SYMTAB, 0,
".strtab",
".hashtab", SHF_PRIVATE);
get_sym_attr(s, 0, 1);
#ifdef TCC_TARGET_PE
/* to make sure that -ltcc1 -Wl,-e,_start will grab the startup code
from libtcc1.a (unless _start defined) */
if (s->elf_entryname)
set_global_sym(s, s->elf_entryname, NULL, 0); /* SHN_UNDEF */
#endif
}
void free_section(Section *s)
{
if (!s)
return;
tcc_free(s->data);
s->data = NULL;
s->data_allocated = s->data_offset = 0;
}
void tccelf_delete(TCCState *s1)
{
int i;
#ifndef ELF_OBJ_ONLY
/* free symbol versions */
for (i = 0; i < nb_sym_versions; i++) {
tcc_free(sym_versions[i].version);
tcc_free(sym_versions[i].lib);
}
tcc_free(sym_versions);
tcc_free(sym_to_version);
#endif
/* free all sections */
for(i = 1; i < s1->nb_sections; i++)
free_section(s1->sections[i]);
dynarray_reset(&s1->sections, &s1->nb_sections);
for(i = 0; i < s1->nb_priv_sections; i++)
free_section(s1->priv_sections[i]);
dynarray_reset(&s1->priv_sections, &s1->nb_priv_sections);
tcc_free(s1->sym_attrs);
symtab_section = NULL; /* for tccrun.c:rt_printline() */
}
/* save section data state */
void tccelf_begin_file(TCCState *s1)
{
Section *s; int i;
for (i = 1; i < s1->nb_sections; i++) {
s = s1->sections[i];
s->sh_offset = s->data_offset;
}
/* disable symbol hashing during compilation */
s = s1->symtab, s->reloc = s->hash, s->hash = NULL;
}
static void update_relocs(TCCState *s1, Section *s, int *old_to_new_syms, int first_sym);
/* At the end of compilation, convert any UNDEF syms to global, and merge
with previously existing symbols */
void tccelf_end_file(TCCState *s1)
{
Section *s = s1->symtab;
int first_sym, nb_syms, *tr, i;
first_sym = s->sh_offset / sizeof (ElfSym);
nb_syms = s->data_offset / sizeof (ElfSym) - first_sym;
s->data_offset = s->sh_offset;
s->link->data_offset = s->link->sh_offset;
s->hash = s->reloc, s->reloc = NULL;
tr = tcc_mallocz(nb_syms * sizeof *tr);
for (i = 0; i < nb_syms; ++i) {
ElfSym *sym = (ElfSym*)s->data + first_sym + i;
if (sym->st_shndx == SHN_UNDEF) {
int sym_bind = ELFW(ST_BIND)(sym->st_info);
int sym_type = ELFW(ST_TYPE)(sym->st_info);
if (sym_bind == STB_LOCAL)
sym_bind = STB_GLOBAL;
#ifndef TCC_TARGET_PE
if (sym_bind == STB_GLOBAL && s1->output_type == TCC_OUTPUT_OBJ) {
/* undefined symbols with STT_FUNC are confusing gnu ld when
linking statically to STT_GNU_IFUNC */
sym_type = STT_NOTYPE;
}
#endif
sym->st_info = ELFW(ST_INFO)(sym_bind, sym_type);
}
tr[i] = set_elf_sym(s, sym->st_value, sym->st_size, sym->st_info,
sym->st_other, sym->st_shndx, (char*)s->link->data + sym->st_name);
}
/* now update relocations */
update_relocs(s1, s, tr, first_sym);
tcc_free(tr);
/* record text/data/bss output for -bench info */
for (i = 0; i < 4; ++i) {
s = s1->sections[i + 1];
s1->total_output[i] += s->data_offset - s->sh_offset;
}
}
Section *new_section(TCCState *s1, const char *name, int sh_type, int sh_flags)
{
Section *sec;
sec = tcc_mallocz(sizeof(Section) + strlen(name));
sec->s1 = s1;
strcpy(sec->name, name);
sec->sh_type = sh_type;
sec->sh_flags = sh_flags;
switch(sh_type) {
case SHT_GNU_versym:
sec->sh_addralign = 2;
break;
case SHT_HASH:
case SHT_GNU_HASH:
case SHT_REL:
case SHT_RELA:
case SHT_DYNSYM:
case SHT_SYMTAB:
case SHT_DYNAMIC:
case SHT_GNU_verneed:
case SHT_GNU_verdef:
sec->sh_addralign = PTR_SIZE;
break;
case SHT_STRTAB:
sec->sh_addralign = 1;
break;
default:
sec->sh_addralign = PTR_SIZE; /* gcc/pcc default alignment */
break;
}
if (sh_flags & SHF_PRIVATE) {
dynarray_add(&s1->priv_sections, &s1->nb_priv_sections, sec);
} else {
sec->sh_num = s1->nb_sections;
dynarray_add(&s1->sections, &s1->nb_sections, sec);
}
return sec;
}
void init_symtab(Section *s)
{
int *ptr, nb_buckets = 1;
put_elf_str(s->link, "");
section_ptr_add(s, sizeof (ElfW(Sym)));
ptr = section_ptr_add(s->hash, (2 + nb_buckets + 1) * sizeof(int));
ptr[0] = nb_buckets;
ptr[1] = 1;
memset(ptr + 2, 0, (nb_buckets + 1) * sizeof(int));
}
Section *new_symtab(TCCState *s1,
const char *symtab_name, int sh_type, int sh_flags,
const char *strtab_name,
const char *hash_name, int hash_sh_flags)
{
Section *symtab, *strtab, *hash;
symtab = new_section(s1, symtab_name, sh_type, sh_flags);
symtab->sh_entsize = sizeof(ElfW(Sym));
strtab = new_section(s1, strtab_name, SHT_STRTAB, sh_flags);
symtab->link = strtab;
hash = new_section(s1, hash_name, SHT_HASH, hash_sh_flags);
hash->sh_entsize = sizeof(int);
symtab->hash = hash;
hash->link = symtab;
init_symtab(symtab);
return symtab;
}
/* realloc section and set its content to zero */
void section_realloc(Section *sec, unsigned long new_size)
{
unsigned long size;
unsigned char *data;
size = sec->data_allocated;
if (size == 0)
size = 1;
while (size < new_size)
size = size * 2;
data = tcc_realloc(sec->data, size);
memset(data + sec->data_allocated, 0, size - sec->data_allocated);
sec->data = data;
sec->data_allocated = size;
}
/* reserve at least 'size' bytes aligned per 'align' in section
'sec' from current offset, and return the aligned offset */
size_t section_add(Section *sec, addr_t size, int align)
{
size_t offset, offset1;
offset = (sec->data_offset + align - 1) & -align;
offset1 = offset + size;
if (sec->sh_type != SHT_NOBITS && offset1 > sec->data_allocated)
section_realloc(sec, offset1);
sec->data_offset = offset1;
if (align > sec->sh_addralign)
sec->sh_addralign = align;
return offset;
}
/* reserve at least 'size' bytes in section 'sec' from
sec->data_offset. */
void *section_ptr_add(Section *sec, addr_t size)
{
size_t offset = section_add(sec, size, 1);
return sec->data + offset;
}
#ifndef ELF_OBJ_ONLY
/* reserve at least 'size' bytes from section start */
static void section_reserve(Section *sec, unsigned long size)
{
if (size > sec->data_allocated)
section_realloc(sec, size);
if (size > sec->data_offset)
sec->data_offset = size;
}
#endif
static Section *have_section(TCCState *s1, const char *name)
{
Section *sec;
int i;
for(i = 1; i < s1->nb_sections; i++) {
sec = s1->sections[i];
if (!strcmp(name, sec->name))
return sec;
}
return NULL;
}
/* return a reference to a section, and create it if it does not
exists */
Section *find_section(TCCState *s1, const char *name)
{
Section *sec = have_section(s1, name);
if (sec)
return sec;
/* sections are created as PROGBITS */
return new_section(s1, name, SHT_PROGBITS, SHF_ALLOC);
}
/* ------------------------------------------------------------------------- */
int put_elf_str(Section *s, const char *sym)
{
int offset, len;
char *ptr;
len = strlen(sym) + 1;
offset = s->data_offset;
ptr = section_ptr_add(s, len);
memmove(ptr, sym, len);
return offset;
}
/* elf symbol hashing function */
static ElfW(Word) elf_hash(const unsigned char *name)
{
ElfW(Word) h = 0, g;
while (*name) {
h = (h << 4) + *name++;
g = h & 0xf0000000;
if (g)
h ^= g >> 24;
h &= ~g;
}
return h;
}
/* rebuild hash table of section s */
/* NOTE: we do factorize the hash table code to go faster */
static void rebuild_hash(Section *s, unsigned int nb_buckets)
{
ElfW(Sym) *sym;
int *ptr, *hash, nb_syms, sym_index, h;
unsigned char *strtab;
strtab = s->link->data;
nb_syms = s->data_offset / sizeof(ElfW(Sym));
if (!nb_buckets)
nb_buckets = ((int*)s->hash->data)[0];
s->hash->data_offset = 0;
ptr = section_ptr_add(s->hash, (2 + nb_buckets + nb_syms) * sizeof(int));
ptr[0] = nb_buckets;
ptr[1] = nb_syms;
ptr += 2;
hash = ptr;
memset(hash, 0, (nb_buckets + 1) * sizeof(int));
ptr += nb_buckets + 1;
sym = (ElfW(Sym) *)s->data + 1;
for(sym_index = 1; sym_index < nb_syms; sym_index++) {
if (ELFW(ST_BIND)(sym->st_info) != STB_LOCAL) {
h = elf_hash(strtab + sym->st_name) % nb_buckets;
*ptr = hash[h];
hash[h] = sym_index;
} else {
*ptr = 0;
}
ptr++;
sym++;
}
}
/* return the symbol number */
int put_elf_sym(Section *s, addr_t value, unsigned long size,
int info, int other, int shndx, const char *name)
{
int name_offset, sym_index;
int nbuckets, h;
ElfW(Sym) *sym;
Section *hs;
sym = section_ptr_add(s, sizeof(ElfW(Sym)));
if (name && name[0])
name_offset = put_elf_str(s->link, name);
else
name_offset = 0;
/* XXX: endianness */
sym->st_name = name_offset;
sym->st_value = value;
sym->st_size = size;
sym->st_info = info;
sym->st_other = other;
sym->st_shndx = shndx;
sym_index = sym - (ElfW(Sym) *)s->data;
hs = s->hash;
if (hs) {
int *ptr, *base;
ptr = section_ptr_add(hs, sizeof(int));
base = (int *)hs->data;
/* only add global or weak symbols. */
if (ELFW(ST_BIND)(info) != STB_LOCAL) {
/* add another hashing entry */
nbuckets = base[0];
h = elf_hash((unsigned char *)s->link->data + name_offset) % nbuckets;
*ptr = base[2 + h];
base[2 + h] = sym_index;
base[1]++;
/* we resize the hash table */
hs->nb_hashed_syms++;
if (hs->nb_hashed_syms > 2 * nbuckets) {
rebuild_hash(s, 2 * nbuckets);
}
} else {
*ptr = 0;
base[1]++;
}
}
return sym_index;
}
int find_elf_sym(Section *s, const char *name)
{
ElfW(Sym) *sym;
Section *hs;
int nbuckets, sym_index, h;
const char *name1;
hs = s->hash;
if (!hs)
return 0;
nbuckets = ((int *)hs->data)[0];
h = elf_hash((unsigned char *) name) % nbuckets;
sym_index = ((int *)hs->data)[2 + h];
while (sym_index != 0) {
sym = &((ElfW(Sym) *)s->data)[sym_index];
name1 = (char *) s->link->data + sym->st_name;
if (!strcmp(name, name1))
return sym_index;
sym_index = ((int *)hs->data)[2 + nbuckets + sym_index];
}
return 0;
}
/* return elf symbol value, signal error if 'err' is nonzero, decorate
name if FORC */
addr_t get_sym_addr(TCCState *s1, const char *name, int err, int forc)
{
int sym_index;
ElfW(Sym) *sym;
char buf[256];
if (forc && s1->leading_underscore) {
buf[0] = '_';
pstrcpy(buf + 1, sizeof(buf) - 1, name);
name = buf;
}
sym_index = find_elf_sym(s1->symtab, name);
sym = &((ElfW(Sym) *)s1->symtab->data)[sym_index];
if (!sym_index || sym->st_shndx == SHN_UNDEF) {
if (err)
tcc_error_noabort("%s not defined", name);
return (addr_t)-1;
}
return sym->st_value;
}
/* return elf symbol value */
void *tcc_get_symbol(TCCState *s, const char *name)
{
addr_t addr = get_sym_addr(s, name, 0, 1);
return addr == -1 ? NULL : (void*)(uintptr_t)addr;
}
/* list elf symbol names and values */
void list_elf_symbols(TCCState *s, void *ctx,
void (*symbol_cb)(void *ctx, const char *name, const void *val))
{
ElfW(Sym) *sym;
Section *symtab;
int sym_index, end_sym;
const char *name;
unsigned char sym_vis, sym_bind;
symtab = s->symtab;
end_sym = symtab->data_offset / sizeof (ElfSym);
for (sym_index = 0; sym_index < end_sym; ++sym_index) {
sym = &((ElfW(Sym) *)symtab->data)[sym_index];
if (sym->st_value) {
name = (char *) symtab->link->data + sym->st_name;
sym_bind = ELFW(ST_BIND)(sym->st_info);
sym_vis = ELFW(ST_VISIBILITY)(sym->st_other);
if (sym_bind == STB_GLOBAL && sym_vis == STV_DEFAULT)
symbol_cb(ctx, name, (void*)(uintptr_t)sym->st_value);
}
}
}
/* list elf symbol names and values */
void tcc_list_symbols(TCCState *s, void *ctx,
void (*symbol_cb)(void *ctx, const char *name, const void *val))
{
list_elf_symbols(s, ctx, symbol_cb);
}
/* add an elf symbol : check if it is already defined and patch
it. Return symbol index. NOTE that sh_num can be SHN_UNDEF. */
int set_elf_sym(Section *s, addr_t value, unsigned long size,
int info, int other, int shndx, const char *name)
{
TCCState *s1 = s->s1;
ElfW(Sym) *esym;
int sym_bind, sym_index, sym_type, esym_bind;
unsigned char sym_vis, esym_vis, new_vis;
sym_bind = ELFW(ST_BIND)(info);
sym_type = ELFW(ST_TYPE)(info);
sym_vis = ELFW(ST_VISIBILITY)(other);
if (sym_bind != STB_LOCAL) {
/* we search global or weak symbols */
sym_index = find_elf_sym(s, name);
if (!sym_index)
goto do_def;
esym = &((ElfW(Sym) *)s->data)[sym_index];
if (esym->st_value == value && esym->st_size == size && esym->st_info == info
&& esym->st_other == other && esym->st_shndx == shndx)
return sym_index;
if (esym->st_shndx != SHN_UNDEF) {
esym_bind = ELFW(ST_BIND)(esym->st_info);
/* propagate the most constraining visibility */
/* STV_DEFAULT(0)<STV_PROTECTED(3)<STV_HIDDEN(2)<STV_INTERNAL(1) */
esym_vis = ELFW(ST_VISIBILITY)(esym->st_other);
if (esym_vis == STV_DEFAULT) {
new_vis = sym_vis;
} else if (sym_vis == STV_DEFAULT) {
new_vis = esym_vis;
} else {
new_vis = (esym_vis < sym_vis) ? esym_vis : sym_vis;
}
esym->st_other = (esym->st_other & ~ELFW(ST_VISIBILITY)(-1))
| new_vis;
if (shndx == SHN_UNDEF) {
/* ignore adding of undefined symbol if the
corresponding symbol is already defined */
} else if (sym_bind == STB_GLOBAL && esym_bind == STB_WEAK) {
/* global overrides weak, so patch */
goto do_patch;
} else if (sym_bind == STB_WEAK && esym_bind == STB_GLOBAL) {
/* weak is ignored if already global */
} else if (sym_bind == STB_WEAK && esym_bind == STB_WEAK) {
/* keep first-found weak definition, ignore subsequents */
} else if (sym_vis == STV_HIDDEN || sym_vis == STV_INTERNAL) {
/* ignore hidden symbols after */
} else if ((esym->st_shndx == SHN_COMMON
|| esym->st_shndx == bss_section->sh_num)
&& (shndx < SHN_LORESERVE
&& shndx != bss_section->sh_num)) {
/* data symbol gets precedence over common/bss */
goto do_patch;
} else if (shndx == SHN_COMMON || shndx == bss_section->sh_num) {
/* data symbol keeps precedence over common/bss */
} else if (s->sh_flags & SHF_DYNSYM) {
/* we accept that two DLL define the same symbol */
} else if (esym->st_other & ST_ASM_SET) {
/* If the existing symbol came from an asm .set
we can override. */
goto do_patch;
} else {
#if 0
printf("new_bind=%x new_shndx=%x new_vis=%x old_bind=%x old_shndx=%x old_vis=%x\n",
sym_bind, shndx, new_vis, esym_bind, esym->st_shndx, esym_vis);
#endif
tcc_error_noabort("'%s' defined twice", name);
}
} else {
esym->st_other = other;
do_patch:
esym->st_info = ELFW(ST_INFO)(sym_bind, sym_type);
esym->st_shndx = shndx;
s1->new_undef_sym = 1;
esym->st_value = value;
esym->st_size = size;
}
} else {
do_def:
sym_index = put_elf_sym(s, value, size,
ELFW(ST_INFO)(sym_bind, sym_type), other,
shndx, name);
}
return sym_index;
}
/* put relocation */
void put_elf_reloca(Section *symtab, Section *s, unsigned long offset,
int type, int symbol, addr_t addend)
{
TCCState *s1 = s->s1;
char buf[256];
Section *sr;
ElfW_Rel *rel;
sr = s->reloc;
if (!sr) {
/* if no relocation section, create it */
snprintf(buf, sizeof(buf), REL_SECTION_FMT, s->name);
/* if the symtab is allocated, then we consider the relocation
are also */
sr = new_section(s->s1, buf, SHT_RELX, symtab->sh_flags);
sr->sh_entsize = sizeof(ElfW_Rel);
sr->link = symtab;
sr->sh_info = s->sh_num;
s->reloc = sr;
}
rel = section_ptr_add(sr, sizeof(ElfW_Rel));
rel->r_offset = offset;
rel->r_info = ELFW(R_INFO)(symbol, type);
#if SHT_RELX == SHT_RELA
rel->r_addend = addend;
#endif
if (SHT_RELX != SHT_RELA && addend)
tcc_error_noabort("non-zero addend on REL architecture");
}
void put_elf_reloc(Section *symtab, Section *s, unsigned long offset,
int type, int symbol)
{
put_elf_reloca(symtab, s, offset, type, symbol, 0);
}
struct sym_attr *get_sym_attr(TCCState *s1, int index, int alloc)
{
int n;
struct sym_attr *tab;
if (index >= s1->nb_sym_attrs) {
if (!alloc)
return s1->sym_attrs;
/* find immediately bigger power of 2 and reallocate array */
n = 1;
while (index >= n)
n *= 2;
tab = tcc_realloc(s1->sym_attrs, n * sizeof(*s1->sym_attrs));
s1->sym_attrs = tab;
memset(s1->sym_attrs + s1->nb_sym_attrs, 0,
(n - s1->nb_sym_attrs) * sizeof(*s1->sym_attrs));
s1->nb_sym_attrs = n;
}
return &s1->sym_attrs[index];
}
static void update_relocs(TCCState *s1, Section *s, int *old_to_new_syms, int first_sym)
{
int i, type, sym_index;
Section *sr;
ElfW_Rel *rel;
for(i = 1; i < s1->nb_sections; i++) {
sr = s1->sections[i];
if (sr->sh_type == SHT_RELX && sr->link == s) {
for_each_elem(sr, 0, rel, ElfW_Rel) {
sym_index = ELFW(R_SYM)(rel->r_info);
type = ELFW(R_TYPE)(rel->r_info);
if ((sym_index -= first_sym) < 0)
continue; /* zero sym_index in reloc (can happen with asm) */
sym_index = old_to_new_syms[sym_index];
rel->r_info = ELFW(R_INFO)(sym_index, type);
}
}
}
}
/* In an ELF file symbol table, the local symbols must appear below
the global and weak ones. Since TCC cannot sort it while generating
the code, we must do it after. All the relocation tables are also
modified to take into account the symbol table sorting */
static void sort_syms(TCCState *s1, Section *s)
{
int *old_to_new_syms;
ElfW(Sym) *new_syms;
int nb_syms, i;
ElfW(Sym) *p, *q;
nb_syms = s->data_offset / sizeof(ElfW(Sym));
new_syms = tcc_malloc(nb_syms * sizeof(ElfW(Sym)));
old_to_new_syms = tcc_malloc(nb_syms * sizeof(int));
/* first pass for local symbols */
p = (ElfW(Sym) *)s->data;
q = new_syms;
for(i = 0; i < nb_syms; i++) {
if (ELFW(ST_BIND)(p->st_info) == STB_LOCAL) {
old_to_new_syms[i] = q - new_syms;
*q++ = *p;
}
p++;
}
/* save the number of local symbols in section header */
if( s->sh_size ) /* this 'if' makes IDA happy */
s->sh_info = q - new_syms;
/* then second pass for non local symbols */
p = (ElfW(Sym) *)s->data;
for(i = 0; i < nb_syms; i++) {
if (ELFW(ST_BIND)(p->st_info) != STB_LOCAL) {
old_to_new_syms[i] = q - new_syms;
*q++ = *p;
}
p++;
}
/* we copy the new symbols to the old */
memcpy(s->data, new_syms, nb_syms * sizeof(ElfW(Sym)));
tcc_free(new_syms);
update_relocs(s1, s, old_to_new_syms, 0);
tcc_free(old_to_new_syms);
}
#ifndef ELF_OBJ_ONLY
/* See: https://flapenguin.me/elf-dt-gnu-hash */
#define ELFCLASS_BITS (PTR_SIZE * 8)
static Elf32_Word elf_gnu_hash (const unsigned char *name)
{
Elf32_Word h = 5381;
unsigned char c;
while ((c = *name++))
h = h * 33 + c;
return h;
}
static void update_gnu_hash(TCCState *s1, Section *gnu_hash)
{
int *old_to_new_syms;
ElfW(Sym) *new_syms;
int nb_syms, i, nbuckets, bloom_size, bloom_shift;
ElfW(Sym) *p, *q;
Section *vs;
Section *dynsym = s1->dynsym;
Elf32_Word *ptr, *buckets, *chain, *hash;
unsigned int *nextbuck;
addr_t *bloom;
unsigned char *strtab;
struct { int first, last; } *buck;
strtab = dynsym->link->data;
nb_syms = dynsym->data_offset / sizeof(ElfW(Sym));
new_syms = tcc_malloc(nb_syms * sizeof(ElfW(Sym)));
old_to_new_syms = tcc_malloc(nb_syms * sizeof(int));
hash = tcc_malloc(nb_syms * sizeof(Elf32_Word));
nextbuck = tcc_malloc(nb_syms * sizeof(int));
/* calculate hashes and copy undefs */
p = (ElfW(Sym) *)dynsym->data;
q = new_syms;
for(i = 0; i < nb_syms; i++, p++) {
if (p->st_shndx == SHN_UNDEF) {
old_to_new_syms[i] = q - new_syms;
*q++ = *p;
}
else
hash[i] = elf_gnu_hash(strtab + p->st_name);
}
ptr = (Elf32_Word *) gnu_hash->data;
nbuckets = ptr[0];
bloom_size = ptr[2];
bloom_shift = ptr[3];
bloom = (addr_t *) (void *) &ptr[4];
buckets = (Elf32_Word*) (void *) &bloom[bloom_size];
chain = &buckets[nbuckets];
buck = tcc_malloc(nbuckets * sizeof(*buck));
if (gnu_hash->data_offset != 4 * 4 +
PTR_SIZE * bloom_size +
nbuckets * 4 +
(nb_syms - (q - new_syms)) * 4)
tcc_error_noabort ("gnu_hash size incorrect");
/* find buckets */
for(i = 0; i < nbuckets; i++)
buck[i].first = -1;
p = (ElfW(Sym) *)dynsym->data;
for(i = 0; i < nb_syms; i++, p++)
if (p->st_shndx != SHN_UNDEF) {
int bucket = hash[i] % nbuckets;
if (buck[bucket].first == -1)
buck[bucket].first = buck[bucket].last = i;
else {
nextbuck[buck[bucket].last] = i;
buck[bucket].last = i;
}
}
/* fill buckets/chains/bloom and sort symbols */
p = (ElfW(Sym) *)dynsym->data;
for(i = 0; i < nbuckets; i++) {
int cur = buck[i].first;
if (cur != -1) {
buckets[i] = q - new_syms;
for (;;) {
old_to_new_syms[cur] = q - new_syms;
*q++ = p[cur];
*chain++ = hash[cur] & ~1;
bloom[(hash[cur] / ELFCLASS_BITS) % bloom_size] |=
(addr_t)1 << (hash[cur] % ELFCLASS_BITS) |
(addr_t)1 << ((hash[cur] >> bloom_shift) % ELFCLASS_BITS);
if (cur == buck[i].last)
break;
cur = nextbuck[cur];
}
chain[-1] |= 1;
}
}
memcpy(dynsym->data, new_syms, nb_syms * sizeof(ElfW(Sym)));
tcc_free(new_syms);
tcc_free(hash);
tcc_free(buck);
tcc_free(nextbuck);
update_relocs(s1, dynsym, old_to_new_syms, 0);
/* modify the versions */
vs = versym_section;
if (vs) {
ElfW(Half) *newver, *versym = (ElfW(Half) *)vs->data;
if (1/*versym*/) {
newver = tcc_malloc(nb_syms * sizeof(*newver));
for (i = 0; i < nb_syms; i++)
newver[old_to_new_syms[i]] = versym[i];
memcpy(vs->data, newver, nb_syms * sizeof(*newver));
tcc_free(newver);
}
}
tcc_free(old_to_new_syms);
/* rebuild hash */
ptr = (Elf32_Word *) dynsym->hash->data;
rebuild_hash(dynsym, ptr[0]);
}
#endif /* ELF_OBJ_ONLY */
/* relocate symbol table, resolve undefined symbols if do_resolve is
true and output error if undefined symbol. */
void relocate_syms(TCCState *s1, Section *symtab, int do_resolve)
{
ElfW(Sym) *sym;
int sym_bind, sh_num;
const char *name;
for_each_elem(symtab, 1, sym, ElfW(Sym)) {
sh_num = sym->st_shndx;
if (sh_num == SHN_UNDEF) {
if (do_resolve == 2) /* relocating dynsym */
continue;
name = (char *) s1->symtab->link->data + sym->st_name;
/* Use ld.so to resolve symbol for us (for tcc -run) */
if (do_resolve) {
/* if dynamic symbol exist, it will be used in relocate_section */
} else if (s1->dynsym && find_elf_sym(s1->dynsym, name))
goto found;
/* XXX: _fp_hw seems to be part of the ABI, so we ignore
it */
if (!strcmp(name, "_fp_hw"))
goto found;
/* only weak symbols are accepted to be undefined. Their
value is zero */
sym_bind = ELFW(ST_BIND)(sym->st_info);
if (sym_bind == STB_WEAK)
sym->st_value = 0;
else
tcc_error_noabort("undefined symbol '%s'", name);
} else if (sh_num < SHN_LORESERVE) {
/* add section base */
sym->st_value += s1->sections[sym->st_shndx]->sh_addr;
}
found: ;
}
}
/* relocate a given section (CPU dependent) by applying the relocations
in the associated relocation section */
static void relocate_section(TCCState *s1, Section *s, Section *sr)
{
ElfW_Rel *rel;
ElfW(Sym) *sym;
int type, sym_index;
unsigned char *ptr;
addr_t tgt, addr;
qrel = (ElfW_Rel *)sr->data;
for_each_elem(sr, 0, rel, ElfW_Rel) {
ptr = s->data + rel->r_offset;
sym_index = ELFW(R_SYM)(rel->r_info);
sym = &((ElfW(Sym) *)symtab_section->data)[sym_index];
type = ELFW(R_TYPE)(rel->r_info);
tgt = sym->st_value;
#if SHT_RELX == SHT_RELA
tgt += rel->r_addend;
#endif
addr = s->sh_addr + rel->r_offset;
relocate(s1, rel, type, ptr, addr, tgt);
}
#ifndef ELF_OBJ_ONLY
/* if the relocation is allocated, we change its symbol table */
if (sr->sh_flags & SHF_ALLOC) {
sr->link = s1->dynsym;
}
#endif
}
/* relocate all sections */
void relocate_sections(TCCState *s1)
{
int i;
Section *s, *sr;
for (i = 1; i < s1->nb_sections; ++i) {
sr = s1->sections[i];
if (sr->sh_type != SHT_RELX)
continue;
s = s1->sections[sr->sh_info];
relocate_section(s1, s, sr);
#ifndef ELF_OBJ_ONLY
if (sr->sh_flags & SHF_ALLOC) {
ElfW_Rel *rel;
/* relocate relocation table in 'sr' */
for_each_elem(sr, 0, rel, ElfW_Rel)
rel->r_offset += s->sh_addr;
}
#endif
}
}
#ifdef NEED_BUILD_GOT
static int build_got(TCCState *s1)
{
/* if no got, then create it */
s1->got = new_section(s1, ".got", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE);
s1->got->sh_entsize = 4;
/* keep space for _DYNAMIC pointer and two dummy got entries */
section_ptr_add(s1->got, 3 * PTR_SIZE);
return set_elf_sym(symtab_section, 0, 0, ELFW(ST_INFO)(STB_GLOBAL, STT_OBJECT),
0, s1->got->sh_num, "_GLOBAL_OFFSET_TABLE_");
}
/* Create a GOT and (for function call) a PLT entry corresponding to a symbol
in s1->symtab. When creating the dynamic symbol table entry for the GOT
relocation, use 'size' and 'info' for the corresponding symbol metadata.
Returns the offset of the GOT or (if any) PLT entry. */
static struct sym_attr * put_got_entry(TCCState *s1, int dyn_reloc_type,
int sym_index)
{
int need_plt_entry;
const char *name;
ElfW(Sym) *sym;
struct sym_attr *attr;
unsigned got_offset;
char plt_name[200];
int len;
Section *s_rel;
need_plt_entry = (dyn_reloc_type == R_JMP_SLOT);
attr = get_sym_attr(s1, sym_index, 1);
/* In case a function is both called and its address taken 2 GOT entries
are created, one for taking the address (GOT) and the other for the PLT
entry (PLTGOT). */
if (need_plt_entry ? attr->plt_offset : attr->got_offset)
return attr;
s_rel = s1->got;
if (need_plt_entry) {
if (!s1->plt) {
s1->plt = new_section(s1, ".plt", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR);
s1->plt->sh_entsize = 4;
}
s_rel = s1->plt;
}
/* create the GOT entry */
got_offset = s1->got->data_offset;
section_ptr_add(s1->got, PTR_SIZE);
/* Create the GOT relocation that will insert the address of the object or
function of interest in the GOT entry. This is a static relocation for
memory output (dlsym will give us the address of symbols) and dynamic
relocation otherwise (executable and DLLs). The relocation should be
done lazily for GOT entry with *_JUMP_SLOT relocation type (the one
associated to a PLT entry) but is currently done at load time for an
unknown reason. */
sym = &((ElfW(Sym) *) symtab_section->data)[sym_index];
name = (char *) symtab_section->link->data + sym->st_name;
//printf("sym %d %s\n", need_plt_entry, name);
if (s1->dynsym) {
if (ELFW(ST_BIND)(sym->st_info) == STB_LOCAL) {
/* Hack alarm. We don't want to emit dynamic symbols
and symbol based relocs for STB_LOCAL symbols, but rather
want to resolve them directly. At this point the symbol
values aren't final yet, so we must defer this. We will later
have to create a RELATIVE reloc anyway, so we misuse the
relocation slot to smuggle the symbol reference until
fill_local_got_entries. Not that the sym_index is
relative to symtab_section, not s1->dynsym! Nevertheless
we use s1->dyn_sym so that if this is the first call
that got->reloc is correctly created. Also note that
RELATIVE relocs are not normally created for the .got,
so the types serves as a marker for later (and is retained
also for the final output, which is okay because then the
got is just normal data). */
put_elf_reloc(s1->dynsym, s1->got, got_offset, R_RELATIVE,
sym_index);
} else {
if (0 == attr->dyn_index)
attr->dyn_index = set_elf_sym(s1->dynsym, sym->st_value,
sym->st_size, sym->st_info, 0,
sym->st_shndx, name);
put_elf_reloc(s1->dynsym, s_rel, got_offset, dyn_reloc_type,
attr->dyn_index);
}
} else {
put_elf_reloc(symtab_section, s1->got, got_offset, dyn_reloc_type,
sym_index);
}
if (need_plt_entry) {
attr->plt_offset = create_plt_entry(s1, got_offset, attr);
/* create a symbol 'sym@plt' for the PLT jump vector */
len = strlen(name);
if (len > sizeof plt_name - 5)
len = sizeof plt_name - 5;
memcpy(plt_name, name, len);
strcpy(plt_name + len, "@plt");
attr->plt_sym = put_elf_sym(s1->symtab, attr->plt_offset, 0,
ELFW(ST_INFO)(STB_GLOBAL, STT_FUNC), 0, s1->plt->sh_num, plt_name);
} else {
attr->got_offset = got_offset;
}
return attr;
}
/* build GOT and PLT entries */
/* Two passes because R_JMP_SLOT should become first. Some targets
(arm, arm64) do not allow mixing R_JMP_SLOT and R_GLOB_DAT. */
void build_got_entries(TCCState *s1, int got_sym)
{
Section *s;
ElfW_Rel *rel;
ElfW(Sym) *sym;
int i, type, gotplt_entry, reloc_type, sym_index;
struct sym_attr *attr;
int pass = 0;
redo:
for(i = 1; i < s1->nb_sections; i++) {
s = s1->sections[i];
if (s->sh_type != SHT_RELX)
continue;
/* no need to handle got relocations */
if (s->link != symtab_section)
continue;
for_each_elem(s, 0, rel, ElfW_Rel) {
type = ELFW(R_TYPE)(rel->r_info);
gotplt_entry = gotplt_entry_type(type);
if (gotplt_entry == -1) {
tcc_error_noabort ("Unknown relocation type for got: %d", type);
continue;
}
sym_index = ELFW(R_SYM)(rel->r_info);
sym = &((ElfW(Sym) *)symtab_section->data)[sym_index];
if (gotplt_entry == NO_GOTPLT_ENTRY) {
continue;
}
/* Automatically create PLT/GOT [entry] if it is an undefined
reference (resolved at runtime), or the symbol is absolute,
probably created by tcc_add_symbol, and thus on 64-bit
targets might be too far from application code. */
if (gotplt_entry == AUTO_GOTPLT_ENTRY) {
if (sym->st_shndx == SHN_UNDEF) {
ElfW(Sym) *esym;
int dynindex;
/* Relocations for UNDEF symbols would normally need
to be transferred into the executable or shared object.
If that were done AUTO_GOTPLT_ENTRY wouldn't exist.
But TCC doesn't do that (at least for exes), so we
need to resolve all such relocs locally. And that
means PLT slots for functions in DLLs and COPY relocs for
data symbols. COPY relocs were generated in
bind_exe_dynsyms (and the symbol adjusted to be defined),
and for functions we were generated a dynamic symbol
of function type. */
if (s1->dynsym) {
/* dynsym isn't set for -run :-/ */
dynindex = get_sym_attr(s1, sym_index, 0)->dyn_index;
esym = (ElfW(Sym) *)s1->dynsym->data + dynindex;
if (dynindex
&& (ELFW(ST_TYPE)(esym->st_info) == STT_FUNC
|| (ELFW(ST_TYPE)(esym->st_info) == STT_NOTYPE
&& ELFW(ST_TYPE)(sym->st_info) == STT_FUNC)))
goto jmp_slot;
}
} else if (sym->st_shndx == SHN_ABS) {
if (sym->st_value == 0) /* from tcc_add_btstub() */
continue;
if (PTR_SIZE != 8)
continue;
/* from tcc_add_symbol(): on 64 bit platforms these
need to go through .got */
} else
continue;
}
reloc_type = code_reloc(type);
if (reloc_type == -1) {
tcc_error_noabort ("Unknown relocation type: %d", type);
continue;
}
if (reloc_type != 0) {
jmp_slot:
if (pass != 0)
continue;
reloc_type = R_JMP_SLOT;
} else {
if (pass != 1)
continue;
reloc_type = R_GLOB_DAT;
}
if (!s1->got)
got_sym = build_got(s1);
if (gotplt_entry == BUILD_GOT_ONLY)
continue;
attr = put_got_entry(s1, reloc_type, sym_index);
if (reloc_type == R_JMP_SLOT)
rel->r_info = ELFW(R_INFO)(attr->plt_sym, type);
}
}
if (++pass < 2)
goto redo;
/* .rel.plt refers to .got actually */
if (s1->plt && s1->plt->reloc)
s1->plt->reloc->sh_info = s1->got->sh_num;
if (got_sym) /* set size */
((ElfW(Sym)*)symtab_section->data)[got_sym].st_size = s1->got->data_offset;
}
#endif /* def NEED_BUILD_GOT */
int set_global_sym(TCCState *s1, const char *name, Section *sec, addr_t offs)
{
int shn = sec ? sec->sh_num : offs || !name ? SHN_ABS : SHN_UNDEF;
if (sec && offs == -1)
offs = sec->data_offset;
return set_elf_sym(symtab_section, offs, 0,
ELFW(ST_INFO)(name ? STB_GLOBAL : STB_LOCAL, STT_NOTYPE), 0, shn, name);
}
static void add_init_array_defines(TCCState *s1, const char *section_name)
{
Section *s;
addr_t end_offset;
char buf[1024];
s = have_section(s1, section_name);
if (!s || !(s->sh_flags & SHF_ALLOC)) {
end_offset = 0;
s = text_section;
} else {
end_offset = s->data_offset;
}
snprintf(buf, sizeof(buf), "__%s_start", section_name + 1);
set_global_sym(s1, buf, s, 0);
snprintf(buf, sizeof(buf), "__%s_end", section_name + 1);
set_global_sym(s1, buf, s, end_offset);
}
void add_array (TCCState *s1, const char *sec, int c)
{
Section *s;
s = find_section(s1, sec);
s->sh_flags = shf_RELRO;
s->sh_type = sec[1] == 'i' ? SHT_INIT_ARRAY : SHT_FINI_ARRAY;
put_elf_reloc (s1->symtab, s, s->data_offset, R_DATA_PTR, c);
section_ptr_add(s, PTR_SIZE);
}
#if !defined TCC_TARGET_PE
/* add libc crt1/crti objects */
void tccelf_add_crtbegin(TCCState *s1)
{
tcc_add_crt(s1, "crt1.o");
tcc_add_crt(s1, "crti.o");
}
void tccelf_add_crtend(TCCState *s1)
{
tcc_add_crt(s1, "crtn.o");
}
#endif /* !defined TCC_TARGET_PE */
#ifndef TCC_TARGET_PE
/* add tcc runtime libraries */
void tcc_add_runtime(TCCState *s1)
{
s1->filetype = 0;
tcc_add_pragma_libs(s1);
/* add libc */
if (!s1->nostdlib) {
int lpthread = s1->option_pthread;
if (lpthread)
tcc_add_library(s1, "pthread");
tcc_add_library(s1, "c");
if (TCC_LIBTCC1[0])
tcc_add_support(s1, TCC_LIBTCC1);
if (s1->output_type != TCC_OUTPUT_MEMORY)
tccelf_add_crtend(s1);
}
}
#endif /* ndef TCC_TARGET_PE */
/* add various standard linker symbols (must be done after the
sections are filled (for example after allocating common
symbols)) */
static void tcc_add_linker_symbols(TCCState *s1)
{
char buf[1024];
int i;
Section *s;
set_global_sym(s1, "_etext", text_section, -1);
set_global_sym(s1, "_edata", data_section, -1);
set_global_sym(s1, "_end", bss_section, -1);
/* horrible new standard ldscript defines */
add_init_array_defines(s1, ".preinit_array");
add_init_array_defines(s1, ".init_array");
add_init_array_defines(s1, ".fini_array");
/* add start and stop symbols for sections whose name can be
expressed in C */
for(i = 1; i < s1->nb_sections; i++) {
s = s1->sections[i];
if ((s->sh_flags & SHF_ALLOC)
&& (s->sh_type == SHT_PROGBITS || s->sh_type == SHT_NOBITS
|| s->sh_type == SHT_STRTAB)) {
/* check if section name can be expressed in C */
const char *p0, *p;
p0 = s->name;
if (*p0 == '.')
++p0;
p = p0;
for(;;) {
int c = *p;
if (!c)
break;
if (!isid(c) && !isnum(c))
goto next_sec;
p++;
}
snprintf(buf, sizeof(buf), "__start_%s", p0);
set_global_sym(s1, buf, s, 0);
snprintf(buf, sizeof(buf), "__stop_%s", p0);
set_global_sym(s1, buf, s, -1);
}
next_sec: ;
}
}
void resolve_common_syms(TCCState *s1)
{
ElfW(Sym) *sym;
/* Allocate common symbols in BSS. */
for_each_elem(symtab_section, 1, sym, ElfW(Sym)) {
if (sym->st_shndx == SHN_COMMON) {
/* symbol alignment is in st_value for SHN_COMMONs */
sym->st_value = section_add(bss_section, sym->st_size,
sym->st_value);
sym->st_shndx = bss_section->sh_num;
}
}
/* Now assign linker provided symbols their value. */
tcc_add_linker_symbols(s1);
}
#ifndef ELF_OBJ_ONLY
void fill_got_entry(TCCState *s1, ElfW_Rel *rel)
{
int sym_index = ELFW(R_SYM) (rel->r_info);
ElfW(Sym) *sym = &((ElfW(Sym) *) symtab_section->data)[sym_index];
struct sym_attr *attr = get_sym_attr(s1, sym_index, 0);
unsigned offset = attr->got_offset;
if (0 == offset)
return;
section_reserve(s1->got, offset + PTR_SIZE);
#if PTR_SIZE == 8
write64le(s1->got->data + offset, sym->st_value);
#else
write32le(s1->got->data + offset, sym->st_value);
#endif
}
/* Perform relocation to GOT or PLT entries */
void fill_got(TCCState *s1)
{
Section *s;
ElfW_Rel *rel;
int i;
for(i = 1; i < s1->nb_sections; i++) {
s = s1->sections[i];
if (s->sh_type != SHT_RELX)
continue;
/* no need to handle got relocations */
if (s->link != symtab_section)
continue;
for_each_elem(s, 0, rel, ElfW_Rel) {
switch (ELFW(R_TYPE) (rel->r_info)) {
case R_X86_64_GOT32:
case R_X86_64_GOTPCREL:
case R_X86_64_GOTPCRELX:
case R_X86_64_REX_GOTPCRELX:
case R_X86_64_PLT32:
fill_got_entry(s1, rel);
break;
}
}
}
}
/* See put_got_entry for a description. This is the second stage
where GOT references to local defined symbols are rewritten. */
static void fill_local_got_entries(TCCState *s1)
{
ElfW_Rel *rel;
if (!s1->got->reloc)
return;
for_each_elem(s1->got->reloc, 0, rel, ElfW_Rel) {
if (ELFW(R_TYPE)(rel->r_info) == R_RELATIVE) {
int sym_index = ELFW(R_SYM) (rel->r_info);
ElfW(Sym) *sym = &((ElfW(Sym) *) symtab_section->data)[sym_index];
struct sym_attr *attr = get_sym_attr(s1, sym_index, 0);
unsigned offset = attr->got_offset;
if (offset != rel->r_offset - s1->got->sh_addr)
tcc_error_noabort("fill_local_got_entries: huh?");
rel->r_info = ELFW(R_INFO)(0, R_RELATIVE);
#if SHT_RELX == SHT_RELA
rel->r_addend = sym->st_value;
#else
/* All our REL architectures also happen to be 32bit LE. */
write32le(s1->got->data + offset, sym->st_value);
#endif
}
}
}
/* decide if an unallocated section should be output. */
static int set_sec_sizes(TCCState *s1)
{
int i;
Section *s;
int textrel = 0;
/* Allocate strings for section names */
for(i = 1; i < s1->nb_sections; i++) {
s = s1->sections[i];
if (s->sh_type == SHT_RELX && !(s->sh_flags & SHF_ALLOC)) {
/* when generating a DLL, we include relocations but
we may patch them */
} else if ((s->sh_flags & SHF_ALLOC)) {
s->sh_size = s->data_offset;
}
}
return textrel;
}
/* various data used under elf_output_file() */
struct dyn_inf {
Section *dynamic;
Section *dynstr;
struct {
/* Info to be copied in dynamic section */
unsigned long data_offset;
addr_t rel_addr;
addr_t rel_size;
};
ElfW(Phdr) *phdr;
int phnum;
int shnum;
Section *interp;
Section *note;
Section *gnu_hash;
/* read only segment mapping for GNU_RELRO */
Section _roinf, *roinf;
};
/* Decide the layout of sections loaded in memory. This must be done before
program headers are filled since they contain info about the layout.
We do the following ordering: interp, symbol tables, relocations, progbits,
nobits */
static int sort_sections(TCCState *s1, int *sec_order, struct dyn_inf *d)
{
Section *s;
int i, j, k, f, f0, n;
int nb_sections = s1->nb_sections;
int *sec_cls = sec_order + nb_sections;
for (i = 1; i < nb_sections; i++) {
s = s1->sections[i];
if (0 == s->sh_name) {
j = 0x900; /* no sh_name: won't go to file */
} else if (s->sh_flags & SHF_ALLOC) {
j = 0x100;
if (s->sh_flags & SHF_WRITE)
j = 0x200;
if (s->sh_flags & SHF_TLS)
j += 0x200;
} else {
j = 0x700;
}
if (j >= 0x700 && s1->output_format != TCC_OUTPUT_FORMAT_ELF)
s->sh_size = 0, j = 0x900;
if (s->sh_type == SHT_SYMTAB || s->sh_type == SHT_DYNSYM) {
k = 0x10;
} else if (s->sh_type == SHT_STRTAB && strcmp(s->name, ".stabstr")) {
k = 0x11;
if (i == nb_sections - 1) /* ".shstrtab" assumed to stay last */
k = 0xff;
} else if (s->sh_type == SHT_HASH || s->sh_type == SHT_GNU_HASH) {
k = 0x12;
} else if (s->sh_type == SHT_GNU_verdef
|| s->sh_type == SHT_GNU_verneed
|| s->sh_type == SHT_GNU_versym) {
k = 0x13;
} else if (s->sh_type == SHT_RELX) {
k = 0x20;
if (s1->plt && s == s1->plt->reloc)
k = 0x21;
} else if (s->sh_flags & SHF_EXECINSTR) {
k = 0x30;
/* RELRO sections --> */
} else if (s->sh_type == SHT_PREINIT_ARRAY) {
k = 0x41;
} else if (s->sh_type == SHT_INIT_ARRAY) {
k = 0x42;
} else if (s->sh_type == SHT_FINI_ARRAY) {
k = 0x43;
} else if (s->sh_type == SHT_DYNAMIC) {
k = 0x46;
} else if (s == s1->got) {
k = 0x47; /* .got as RELRO needs BIND_NOW in DT_FLAGS */
} else if (s->reloc && (s->reloc->sh_flags & SHF_ALLOC) && j == 0x100) {
k = 0x44;
/* <-- */
} else if (s->sh_type == SHT_NOTE) {
k = 0x60;
} else if (s->sh_type == SHT_NOBITS) {
k = 0x70; /* bss */
} else if (s == d->interp) {
k = 0x00;
} else {
k = 0x50; /* data */
}
k += j;
if ((k & 0xfff0) == 0x140) {
/* make RELRO section writable */
k += 0x100, s->sh_flags |= SHF_WRITE;
}
for (n = i; n > 1 && k < (f = sec_cls[n - 1]); --n)
sec_cls[n] = f, sec_order[n] = sec_order[n - 1];
sec_cls[n] = k, sec_order[n] = i;
}
sec_order[0] = 0;
d->shnum = 1;
/* count PT_LOAD headers needed */
n = f0 = 0;
for (i = 1; i < nb_sections; i++) {
s = s1->sections[sec_order[i]];
k = sec_cls[i];
f = 0;
if (k < 0x900)
++d->shnum;
if (k < 0x700) {
f = s->sh_flags & (SHF_ALLOC|SHF_WRITE|SHF_EXECINSTR|SHF_TLS);
#if TARGETOS_NetBSD
/* NetBSD only supports 2 PT_LOAD sections.
See: https://blog.netbsd.org/tnf/entry/the_first_report_on_lld */
if ((f & SHF_WRITE) == 0)
f |= SHF_EXECINSTR;
#else
if ((k & 0xfff0) == 0x240) /* RELRO sections */
f |= 1<<4;
#endif
/* start new header when flags changed or relro, but avoid zero memsz */
if (f != f0 && s->sh_size)
f0 = f, ++n, f |= 1<<8;
}
sec_cls[i] = f;
//printf("ph %d sec %02d : %3X %3X %8.2X %04X %s\n", (f>0) * n, i, f, k, s->sh_type, (int)s->sh_size, s->name);
}
return n;
}
static ElfW(Phdr) *fill_phdr(ElfW(Phdr) *ph, int type, Section *s)
{
if (s) {
ph->p_offset = s->sh_offset;
ph->p_vaddr = s->sh_addr;
ph->p_filesz = s->sh_size;
ph->p_align = s->sh_addralign;
}
ph->p_type = type;
ph->p_flags = PF_R;
ph->p_paddr = ph->p_vaddr;
ph->p_memsz = ph->p_filesz;
return ph;
}
/* Assign sections to segments and decide how are sections laid out when loaded
in memory. This function also fills corresponding program headers. */
static int layout_sections(TCCState *s1, int *sec_order, struct dyn_inf *d)
{
Section *s;
addr_t addr, tmp, align, s_align, base;
ElfW(Phdr) *ph = NULL;
int i, f, n, phnum, phfill;
int file_offset;
/* compute number of program headers */
phnum = sort_sections(s1, sec_order, d);
phfill = 0; /* set to 1 to have dll's with a PT_PHDR */
if (d->interp)
phfill = 2;
phnum += phfill;
if (d->note)
++phnum;
if (d->dynamic)
++phnum;
if (eh_frame_hdr_section)
++phnum;
if (d->roinf)
++phnum;
d->phnum = phnum;
d->phdr = tcc_mallocz(phnum * sizeof(ElfW(Phdr)));
file_offset = 0;
if (s1->output_format == TCC_OUTPUT_FORMAT_ELF) {
file_offset = (sizeof(ElfW(Ehdr)) + phnum * sizeof(ElfW(Phdr)) + 3) & -4;
file_offset += d->shnum * sizeof (ElfW(Shdr));
}
s_align = ELF_PAGE_SIZE;
if (s1->section_align)
s_align = s1->section_align;
addr = ELF_START_ADDR;
if (s1->has_text_addr) {
addr = s1->text_addr;
if (0) {
int a_offset, p_offset;
/* we ensure that (addr % ELF_PAGE_SIZE) == file_offset %
ELF_PAGE_SIZE */
a_offset = (int) (addr & (s_align - 1));
p_offset = file_offset & (s_align - 1);
if (a_offset < p_offset)
a_offset += s_align;
file_offset += (a_offset - p_offset);
}
}
base = addr;
/* compute address after headers */
addr += file_offset;
n = 0;
for(i = 1; i < s1->nb_sections; i++) {
s = s1->sections[sec_order[i]];
f = sec_order[i + s1->nb_sections];
align = s->sh_addralign - 1;
if (f == 0) { /* no alloc */
file_offset = (file_offset + align) & ~align;
s->sh_offset = file_offset;
if (s->sh_type != SHT_NOBITS)
file_offset += s->sh_size;
continue;
}
if ((f & 1<<8) && n) {
/* different rwx section flags */
if (s1->output_format == TCC_OUTPUT_FORMAT_ELF) {
/* if in the middle of a page, w e duplicate the page in
memory so that one copy is RX and the other is RW */
if ((addr & (s_align - 1)) != 0)
addr += s_align;
} else {
align = s_align - 1;
}
}
tmp = addr;
addr = (addr + align) & ~align;
file_offset += (int)(addr - tmp);
s->sh_offset = file_offset;
s->sh_addr = addr;
if (f & 1<<8) {
/* set new program header */
ph = &d->phdr[phfill + n];
ph->p_type = PT_LOAD;
ph->p_align = s_align;
ph->p_flags = PF_R;
if (f & SHF_WRITE)
ph->p_flags |= PF_W;
if (f & SHF_EXECINSTR)
ph->p_flags |= PF_X;
if (f & SHF_TLS) {
ph->p_type = PT_TLS;
ph->p_align = align + 1;
}
ph->p_offset = file_offset;
ph->p_vaddr = addr;
if (n == 0) {
/* Make the first PT_LOAD segment include the program
headers itself (and the ELF header as well), it'll
come out with same memory use but will make various
tools like binutils strip work better. */
ph->p_offset = 0;
ph->p_vaddr = base;
}
ph->p_paddr = ph->p_vaddr;
++n;
}
if (f & 1<<4) {
Section *roinf = &d->_roinf;
if (roinf->sh_size == 0) {
roinf->sh_offset = s->sh_offset;
roinf->sh_addr = s->sh_addr;
roinf->sh_addralign = 1;
}
roinf->sh_size = (addr - roinf->sh_addr) + s->sh_size;
}
addr += s->sh_size;
if (s->sh_type != SHT_NOBITS)
file_offset += s->sh_size;
ph->p_filesz = file_offset - ph->p_offset;
ph->p_memsz = addr - ph->p_vaddr;
}
/* Fill other headers */
if (d->note)
fill_phdr(++ph, PT_NOTE, d->note);
if (d->dynamic)
fill_phdr(++ph, PT_DYNAMIC, d->dynamic)->p_flags |= PF_W;
if (eh_frame_hdr_section)
fill_phdr(++ph, PT_GNU_EH_FRAME, eh_frame_hdr_section);
if (d->roinf)
fill_phdr(++ph, PT_GNU_RELRO, d->roinf)->p_flags |= PF_W;
if (d->interp)
fill_phdr(&d->phdr[1], PT_INTERP, d->interp);
if (phfill) {
ph = &d->phdr[0];
ph->p_offset = sizeof(ElfW(Ehdr));
ph->p_vaddr = base + ph->p_offset;
ph->p_filesz = phnum * sizeof(ElfW(Phdr));
ph->p_align = 4;
fill_phdr(ph, PT_PHDR, NULL);
}
return 0;
}
/* put dynamic tag */
static void put_dt(Section *dynamic, int dt, addr_t val)
{
ElfW(Dyn) *dyn;
dyn = section_ptr_add(dynamic, sizeof(ElfW(Dyn)));
dyn->d_tag = dt;
dyn->d_un.d_val = val;
}
/* Fill the dynamic section with tags describing the address and size of
sections */
static void fill_dynamic(TCCState *s1, struct dyn_inf *dyninf)
{
Section *dynamic = dyninf->dynamic;
Section *s;
/* put dynamic section entries */
put_dt(dynamic, DT_HASH, s1->dynsym->hash->sh_addr);
put_dt(dynamic, DT_GNU_HASH, dyninf->gnu_hash->sh_addr);
put_dt(dynamic, DT_STRTAB, dyninf->dynstr->sh_addr);
put_dt(dynamic, DT_SYMTAB, s1->dynsym->sh_addr);
put_dt(dynamic, DT_STRSZ, dyninf->dynstr->data_offset);
put_dt(dynamic, DT_SYMENT, sizeof(ElfW(Sym)));
#if PTR_SIZE == 8
put_dt(dynamic, DT_RELA, dyninf->rel_addr);
put_dt(dynamic, DT_RELASZ, dyninf->rel_size);
put_dt(dynamic, DT_RELAENT, sizeof(ElfW_Rel));
if (s1->plt && s1->plt->reloc) {
put_dt(dynamic, DT_PLTGOT, s1->got->sh_addr);
put_dt(dynamic, DT_PLTRELSZ, s1->plt->reloc->data_offset);
put_dt(dynamic, DT_JMPREL, s1->plt->reloc->sh_addr);
put_dt(dynamic, DT_PLTREL, DT_RELA);
}
put_dt(dynamic, DT_RELACOUNT, 0);
#else
put_dt(dynamic, DT_REL, dyninf->rel_addr);
put_dt(dynamic, DT_RELSZ, dyninf->rel_size);
put_dt(dynamic, DT_RELENT, sizeof(ElfW_Rel));
if (s1->plt && s1->plt->reloc) {
put_dt(dynamic, DT_PLTGOT, s1->got->sh_addr);
put_dt(dynamic, DT_PLTRELSZ, s1->plt->reloc->data_offset);
put_dt(dynamic, DT_JMPREL, s1->plt->reloc->sh_addr);
put_dt(dynamic, DT_PLTREL, DT_REL);
}
put_dt(dynamic, DT_RELCOUNT, 0);
#endif
if (versym_section && verneed_section) {
/* The dynamic linker can not handle VERSYM without VERNEED */
put_dt(dynamic, DT_VERSYM, versym_section->sh_addr);
put_dt(dynamic, DT_VERNEED, verneed_section->sh_addr);
put_dt(dynamic, DT_VERNEEDNUM, dt_verneednum);
}
s = have_section(s1, ".preinit_array");
if (s && s->data_offset) {
put_dt(dynamic, DT_PREINIT_ARRAY, s->sh_addr);
put_dt(dynamic, DT_PREINIT_ARRAYSZ, s->data_offset);
}
s = have_section(s1, ".init_array");
if (s && s->data_offset) {
put_dt(dynamic, DT_INIT_ARRAY, s->sh_addr);
put_dt(dynamic, DT_INIT_ARRAYSZ, s->data_offset);
}
s = have_section(s1, ".fini_array");
if (s && s->data_offset) {
put_dt(dynamic, DT_FINI_ARRAY, s->sh_addr);
put_dt(dynamic, DT_FINI_ARRAYSZ, s->data_offset);
}
s = have_section(s1, ".init");
if (s && s->data_offset) {
put_dt(dynamic, DT_INIT, s->sh_addr);
}
s = have_section(s1, ".fini");
if (s && s->data_offset) {
put_dt(dynamic, DT_FINI, s->sh_addr);
}
put_dt(dynamic, DT_NULL, 0);
}
/* Remove gaps between RELX sections.
These gaps are a result of final_sections_reloc. Here some relocs are removed.
The gaps are then filled with 0 in tcc_output_elf. The 0 is intepreted as
R_...NONE reloc. This does work on most targets but on OpenBSD/arm64 this
is illegal. OpenBSD/arm64 does not support R_...NONE reloc. */
static void update_reloc_sections(TCCState *s1, struct dyn_inf *dyninf)
{
int i;
unsigned long file_offset = 0;
Section *s;
Section *relocplt = s1->plt ? s1->plt->reloc : NULL;
/* dynamic relocation table information, for .dynamic section */
dyninf->rel_addr = dyninf->rel_size = 0;
for(i = 1; i < s1->nb_sections; i++) {
s = s1->sections[i];
if (s->sh_type == SHT_RELX && s != relocplt) {
if (dyninf->rel_size == 0) {
dyninf->rel_addr = s->sh_addr;
file_offset = s->sh_offset;
}
else {
s->sh_addr = dyninf->rel_addr + dyninf->rel_size;
s->sh_offset = file_offset + dyninf->rel_size;
}
dyninf->rel_size += s->sh_size;
}
}
}
#endif /* ndef ELF_OBJ_ONLY */
/* Create an ELF file on disk.
This function handle ELF specific layout requirements */
static int tcc_output_elf(TCCState *s1, FILE *f, int phnum, ElfW(Phdr) *phdr)
{
int i, shnum, offset, size, file_type;
Section *s;
ElfW(Ehdr) ehdr;
ElfW(Shdr) shdr, *sh;
file_type = s1->output_type;
shnum = s1->nb_sections;
memset(&ehdr, 0, sizeof(ehdr));
if (phnum > 0) {
ehdr.e_phentsize = sizeof(ElfW(Phdr));
ehdr.e_phnum = phnum;
ehdr.e_phoff = sizeof(ElfW(Ehdr));
}
/* fill header */
ehdr.e_ident[0] = ELFMAG0;
ehdr.e_ident[1] = ELFMAG1;
ehdr.e_ident[2] = ELFMAG2;
ehdr.e_ident[3] = ELFMAG3;
ehdr.e_ident[4] = ELFCLASSW;
ehdr.e_ident[5] = ELFDATA2LSB;
ehdr.e_ident[6] = EV_CURRENT;
if (file_type == TCC_OUTPUT_OBJ) {
ehdr.e_type = ET_REL;
} else {
ehdr.e_type = ET_EXEC;
if (s1->entryname)
ehdr.e_entry = get_sym_addr(s1, s1->entryname, 1, 0);
else
ehdr.e_entry = get_sym_addr(s1, "_start", !!(file_type & TCC_OUTPUT_EXE), 0);
if (ehdr.e_entry == (addr_t)-1)
ehdr.e_entry = text_section->sh_addr;
if (s1->nb_errors)
return -1;
}
sort_syms(s1, s1->symtab);
ehdr.e_machine = EM_386;
ehdr.e_version = EV_CURRENT;
ehdr.e_shoff = (sizeof(ElfW(Ehdr)) + phnum * sizeof(ElfW(Phdr)) + 3) & -4;
ehdr.e_ehsize = sizeof(ElfW(Ehdr));
ehdr.e_shentsize = sizeof(ElfW(Shdr));
ehdr.e_shnum = shnum;
ehdr.e_shstrndx = shnum - 1;
offset = fwrite(&ehdr, 1, sizeof(ElfW(Ehdr)), f);
if (phdr)
offset += fwrite(phdr, 1, phnum * sizeof(ElfW(Phdr)), f);
/* output section headers */
while (offset < ehdr.e_shoff) {
fputc(0, f);
offset++;
}
for(i = 0; i < shnum; i++) {
sh = &shdr;
memset(sh, 0, sizeof(ElfW(Shdr)));
if (i) {
s = s1->sections[i];
sh->sh_name = s->sh_name;
sh->sh_type = s->sh_type;
sh->sh_flags = s->sh_flags;
sh->sh_entsize = s->sh_entsize;
sh->sh_info = s->sh_info;
if (s->link)
sh->sh_link = s->link->sh_num;
sh->sh_addralign = s->sh_addralign;
sh->sh_addr = s->sh_addr;
sh->sh_offset = s->sh_offset;
sh->sh_size = s->sh_size;
}
offset += fwrite(sh, 1, sizeof(ElfW(Shdr)), f);
}
/* output sections */
for(i = 1; i < s1->nb_sections; i++) {
s = s1->sections[i];
if (s->sh_type != SHT_NOBITS) {
while (offset < s->sh_offset) {
fputc(0, f);
offset++;
}
size = s->sh_size;
if (size)
offset += fwrite(s->data, 1, size, f);
}
}
return 0;
}
static int tcc_output_binary(TCCState *s1, FILE *f)
{
Section *s;
int i, offset, size;
offset = 0;
for(i=1;i<s1->nb_sections;i++) {
s = s1->sections[i];
if (s->sh_type != SHT_NOBITS &&
(s->sh_flags & SHF_ALLOC)) {
while (offset < s->sh_offset) {
fputc(0, f);
offset++;
}
size = s->sh_size;
fwrite(s->data, 1, size, f);
offset += size;
}
}
return 0;
}
/* Write an elf, coff or "binary" file */
static int tcc_write_elf_file(TCCState *s1, const char *filename, int phnum,
ElfW(Phdr) *phdr)
{
int fd, mode, file_type, ret;
FILE *f;
file_type = s1->output_type;
if (file_type == TCC_OUTPUT_OBJ)
mode = 0666;
else
mode = 0777;
unlink(filename);
fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, mode);
if (fd < 0 || (f = fdopen(fd, "wb")) == NULL)
return tcc_error_noabort("could not write '%s: %s'", filename, strerror(errno));
if (s1->verbose)
printf("<- %s\n", filename);
#ifdef TCC_TARGET_COFF
if (s1->output_format == TCC_OUTPUT_FORMAT_COFF)
tcc_output_coff(s1, f);
else
#endif
if (s1->output_format == TCC_OUTPUT_FORMAT_ELF)
ret = tcc_output_elf(s1, f, phnum, phdr);
else
ret = tcc_output_binary(s1, f);
fclose(f);
return ret;
}
#ifndef ELF_OBJ_ONLY
/* order sections according to sec_order, remove sections
that we aren't going to output. */
static void reorder_sections(TCCState *s1, int *sec_order)
{
int i, nnew, k, *backmap;
Section **snew, *s;
ElfW(Sym) *sym;
backmap = tcc_malloc(s1->nb_sections * sizeof(backmap[0]));
for (i = 0, nnew = 0, snew = NULL; i < s1->nb_sections; i++) {
k = sec_order[i];
s = s1->sections[k];
if (!i || s->sh_name) {
backmap[k] = nnew;
dynarray_add(&snew, &nnew, s);
} else {
backmap[k] = 0;
/* just remember to free them later */
dynarray_add(&s1->priv_sections, &s1->nb_priv_sections, s);
}
}
for (i = 1; i < nnew; i++) {
s = snew[i];
s->sh_num = i;
if (s->sh_type == SHT_RELX)
s->sh_info = backmap[s->sh_info];
else if (s->sh_type == SHT_SYMTAB || s->sh_type == SHT_DYNSYM)
for_each_elem(s, 1, sym, ElfW(Sym))
if (sym->st_shndx < s1->nb_sections)
sym->st_shndx = backmap[sym->st_shndx];
}
tcc_free(s1->sections);
s1->sections = snew;
s1->nb_sections = nnew;
tcc_free(backmap);
}
static void alloc_sec_names(TCCState *s1, int is_obj);
/* Output an elf, coff or binary file */
/* XXX: suppress unneeded sections */
static int elf_output_file(TCCState *s1, const char *filename)
{
int i, ret, file_type, *sec_order;
struct dyn_inf dyninf = {0};
Section *interp, *dynstr, *dynamic;
int textrel, got_sym;
file_type = s1->output_type;
s1->nb_errors = 0;
ret = -1;
interp = dynstr = dynamic = NULL;
sec_order = NULL;
dyninf.roinf = &dyninf._roinf;
/* if linking, also link in runtime libraries (libc, libgcc, etc.) */
tcc_add_runtime(s1);
resolve_common_syms(s1);
build_got_entries(s1, 0);
textrel = set_sec_sizes(s1);
/* create and fill .shstrtab section */
alloc_sec_names(s1, 0);
/* this array is used to reorder sections in the output file */
sec_order = tcc_malloc(sizeof(int) * 2 * s1->nb_sections);
/* compute section to program header mapping */
layout_sections(s1, sec_order, &dyninf);
if (dynamic) {
/* put in GOT the dynamic section address and relocate PLT */
write32le(s1->got->data, dynamic->sh_addr);
if (file_type == TCC_OUTPUT_EXE)
relocate_plt(s1);
/* relocate symbols in .dynsym now that final addresses are known */
relocate_syms(s1, s1->dynsym, 2);
}
/* if building executable or DLL, then relocate each section
except the GOT which is already relocated */
relocate_syms(s1, s1->symtab, 0);
if (s1->nb_errors != 0)
goto the_end;
relocate_sections(s1);
if (dynamic) {
update_reloc_sections (s1, &dyninf);
dynamic->data_offset = dyninf.data_offset;
fill_dynamic(s1, &dyninf);
}
/* Perform relocation to GOT or PLT entries */
if (file_type == TCC_OUTPUT_EXE)
fill_got(s1);
else if (s1->got)
fill_local_got_entries(s1);
if (dyninf.gnu_hash)
update_gnu_hash(s1, dyninf.gnu_hash);
reorder_sections(s1, sec_order);
/* Create the ELF file with name 'filename' */
ret = tcc_write_elf_file(s1, filename, dyninf.phnum, dyninf.phdr);
the_end:
tcc_free(sec_order);
tcc_free(dyninf.phdr);
return ret;
}
#endif /* ndef ELF_OBJ_ONLY */
/* Allocate strings for section names */
static void alloc_sec_names(TCCState *s1, int is_obj)
{
int i;
Section *s, *strsec;
strsec = new_section(s1, ".shstrtab", SHT_STRTAB, 0);
put_elf_str(strsec, "");
for(i = 1; i < s1->nb_sections; i++) {
s = s1->sections[i];
if (is_obj)
s->sh_size = s->data_offset;
if (s->sh_size || s == strsec || (s->sh_flags & SHF_ALLOC) || is_obj)
s->sh_name = put_elf_str(strsec, s->name);
}
strsec->sh_size = strsec->data_offset;
}
/* Output an elf .o file */
static int elf_output_obj(TCCState *s1, const char *filename)
{
Section *s;
int i, ret, file_offset;
s1->nb_errors = 0;
/* Allocate strings for section names */
alloc_sec_names(s1, 1);
file_offset = (sizeof (ElfW(Ehdr)) + 3) & -4;
file_offset += s1->nb_sections * sizeof(ElfW(Shdr));
for(i = 1; i < s1->nb_sections; i++) {
s = s1->sections[i];
file_offset = (file_offset + 15) & -16;
s->sh_offset = file_offset;
if (s->sh_type != SHT_NOBITS)
file_offset += s->sh_size;
}
/* Create the ELF file with name 'filename' */
ret = tcc_write_elf_file(s1, filename, 0, NULL);
return ret;
}
int tcc_output_file(TCCState *s, const char *filename)
{
if (s->output_type == TCC_OUTPUT_OBJ)
return elf_output_obj(s, filename);
#ifdef TCC_TARGET_PE
return pe_output_file(s, filename);
#else
return elf_output_file(s, filename);
#endif
}
ssize_t full_read(int fd, void *buf, size_t count) {
char *cbuf = buf;
size_t rnum = 0;
while (1) {
ssize_t num = read(fd, cbuf, count-rnum);
if (num < 0) return num;
if (num == 0) return rnum;
rnum += num;
cbuf += num;
}
}
void *load_data(int fd, unsigned long file_offset, unsigned long size)
{
void *data;
data = tcc_malloc(size);
lseek(fd, file_offset, SEEK_SET);
full_read(fd, data, size);
return data;
}
typedef struct SectionMergeInfo {
Section *s; /* corresponding existing section */
unsigned long offset; /* offset of the new section in the existing section */
uint8_t new_section; /* true if section 's' was added */
uint8_t link_once; /* true if link once section */
} SectionMergeInfo;
int tcc_object_type(int fd, ElfW(Ehdr) *h)
{
int size = full_read(fd, h, sizeof *h);
if (size == sizeof *h && 0 == memcmp(h, ELFMAG, 4)) {
if (h->e_type == ET_REL)
return AFF_BINTYPE_REL;
if (h->e_type == ET_DYN)
return AFF_BINTYPE_DYN;
} else if (size >= 8) {
if (0 == memcmp(h, ARMAG, 8))
return AFF_BINTYPE_AR;
else if (((FILHDR *)h)->f_magic == F_MACH_I386)
return (AFF_BINTYPE_COFF);
}
return 0;
}
/* load an object file and merge it with current files */
/* XXX: handle correctly stab (debug) info */
int tcc_load_object_file(TCCState *s1,
int fd, unsigned long file_offset)
{
ElfW(Ehdr) ehdr;
ElfW(Shdr) *shdr, *sh;
unsigned long size, offset, offseti;
int i, j, nb_syms, sym_index, ret, seencompressed;
char *strsec, *strtab;
int *old_to_new_syms;
char *sh_name, *name;
SectionMergeInfo *sm_table, *sm;
ElfW(Sym) *sym, *symtab;
ElfW_Rel *rel;
Section *s;
lseek(fd, file_offset, SEEK_SET);
if (tcc_object_type(fd, &ehdr) != AFF_BINTYPE_REL)
goto invalid;
/* test CPU specific stuff */
if (ehdr.e_ident[5] != ELFDATA2LSB ||
ehdr.e_machine != EM_386) {
invalid:
return tcc_error_noabort("invalid object file");
}
/* read sections */
shdr = load_data(fd, file_offset + ehdr.e_shoff,
sizeof(ElfW(Shdr)) * ehdr.e_shnum);
sm_table = tcc_mallocz(sizeof(SectionMergeInfo) * ehdr.e_shnum);
/* load section names */
sh = &shdr[ehdr.e_shstrndx];
strsec = load_data(fd, file_offset + sh->sh_offset, sh->sh_size);
/* load symtab and strtab */
old_to_new_syms = NULL;
symtab = NULL;
strtab = NULL;
nb_syms = 0;
seencompressed = 0;
ret = -1;
for(i = 1; i < ehdr.e_shnum; i++) {
sh = &shdr[i];
if (sh->sh_type == SHT_SYMTAB) {
if (symtab) {
tcc_error_noabort("object must contain only one symtab");
goto the_end;
}
nb_syms = sh->sh_size / sizeof(ElfW(Sym));
symtab = load_data(fd, file_offset + sh->sh_offset, sh->sh_size);
sm_table[i].s = symtab_section;
/* now load strtab */
sh = &shdr[sh->sh_link];
strtab = load_data(fd, file_offset + sh->sh_offset, sh->sh_size);
}
if (sh->sh_flags & SHF_COMPRESSED)
seencompressed = 1;
}
/* now examine each section and try to merge its content with the
ones in memory */
for(i = 1; i < ehdr.e_shnum; i++) {
/* no need to examine section name strtab */
if (i == ehdr.e_shstrndx)
continue;
sh = &shdr[i];
if (sh->sh_type == SHT_RELX)
sh = &shdr[sh->sh_info];
/* ignore sections types we do not handle (plus relocs to those) */
sh_name = strsec + sh->sh_name;
if (0 == strncmp(sh_name, ".debug_", 7)
|| 0 == strncmp(sh_name, ".stab", 5)) {
if (seencompressed)
continue;
} else
if (sh->sh_type != SHT_PROGBITS &&
sh->sh_type != SHT_NOTE &&
sh->sh_type != SHT_NOBITS &&
sh->sh_type != SHT_PREINIT_ARRAY &&
sh->sh_type != SHT_INIT_ARRAY &&
sh->sh_type != SHT_FINI_ARRAY
)
continue;
sh = &shdr[i];
sh_name = strsec + sh->sh_name;
if (sh->sh_addralign < 1)
sh->sh_addralign = 1;
/* find corresponding section, if any */
for(j = 1; j < s1->nb_sections;j++) {
s = s1->sections[j];
if (strcmp(s->name, sh_name))
continue;
if (sh->sh_type != s->sh_type
&& strcmp (s->name, ".eh_frame")
) {
tcc_error_noabort("section type conflict: %s %02x <> %02x", s->name, sh->sh_type, s->sh_type);
goto the_end;
}
if (!strncmp(sh_name, ".gnu.linkonce", 13)) {
/* if a 'linkonce' section is already present, we
do not add it again. It is a little tricky as
symbols can still be defined in
it. */
sm_table[i].link_once = 1;
goto next;
}
goto found;
}
/* not found: create new section */
s = new_section(s1, sh_name, sh->sh_type, sh->sh_flags & ~SHF_GROUP);
/* take as much info as possible from the section. sh_link and
sh_info will be updated later */
s->sh_addralign = sh->sh_addralign;
s->sh_entsize = sh->sh_entsize;
sm_table[i].new_section = 1;
found:
/* align start of section */
s->data_offset += -s->data_offset & (sh->sh_addralign - 1);
if (sh->sh_addralign > s->sh_addralign)
s->sh_addralign = sh->sh_addralign;
sm_table[i].offset = s->data_offset;
sm_table[i].s = s;
/* concatenate sections */
size = sh->sh_size;
if (sh->sh_type != SHT_NOBITS) {
unsigned char *ptr;
lseek(fd, file_offset + sh->sh_offset, SEEK_SET);
ptr = section_ptr_add(s, size);
full_read(fd, ptr, size);
} else {
s->data_offset += size;
}
/* align end of section */
/* This is needed if we compile a c file after this */
if (s == text_section || s == data_section || s == rodata_section ||
s == bss_section || s == common_section)
s->data_offset += -s->data_offset & (s->sh_addralign - 1);
next: ;
}
/* second short pass to update sh_link and sh_info fields of new
sections */
for(i = 1; i < ehdr.e_shnum; i++) {
s = sm_table[i].s;
if (!s || !sm_table[i].new_section)
continue;
sh = &shdr[i];
if (sh->sh_link > 0)
s->link = sm_table[sh->sh_link].s;
if (sh->sh_type == SHT_RELX) {
s->sh_info = sm_table[sh->sh_info].s->sh_num;
/* update backward link */
s1->sections[s->sh_info]->reloc = s;
}
}
/* resolve symbols */
old_to_new_syms = tcc_mallocz(nb_syms * sizeof(int));
sym = symtab + 1;
for(i = 1; i < nb_syms; i++, sym++) {
if (sym->st_shndx != SHN_UNDEF &&
sym->st_shndx < SHN_LORESERVE) {
sm = &sm_table[sym->st_shndx];
if (sm->link_once) {
/* if a symbol is in a link once section, we use the
already defined symbol. It is very important to get
correct relocations */
if (ELFW(ST_BIND)(sym->st_info) != STB_LOCAL) {
name = strtab + sym->st_name;
sym_index = find_elf_sym(symtab_section, name);
if (sym_index)
old_to_new_syms[i] = sym_index;
}
continue;
}
/* if no corresponding section added, no need to add symbol */
if (!sm->s)
continue;
/* convert section number */
sym->st_shndx = sm->s->sh_num;
/* offset value */
sym->st_value += sm->offset;
}
/* add symbol */
name = strtab + sym->st_name;
sym_index = set_elf_sym(symtab_section, sym->st_value, sym->st_size,
sym->st_info, sym->st_other,
sym->st_shndx, name);
old_to_new_syms[i] = sym_index;
}
/* third pass to patch relocation entries */
for(i = 1; i < ehdr.e_shnum; i++) {
s = sm_table[i].s;
if (!s)
continue;
sh = &shdr[i];
offset = sm_table[i].offset;
size = sh->sh_size;
switch(s->sh_type) {
case SHT_RELX:
/* take relocation offset information */
offseti = sm_table[sh->sh_info].offset;
for (rel = (ElfW_Rel *) s->data + (offset / sizeof(*rel));
rel < (ElfW_Rel *) s->data + ((offset + size) / sizeof(*rel));
rel++) {
int type;
unsigned sym_index;
/* convert symbol index */
type = ELFW(R_TYPE)(rel->r_info);
sym_index = ELFW(R_SYM)(rel->r_info);
/* NOTE: only one symtab assumed */
if (sym_index >= nb_syms)
goto invalid_reloc;
sym_index = old_to_new_syms[sym_index];
/* ignore link_once in rel section. */
if (!sym_index && !sm_table[sh->sh_info].link_once
) {
invalid_reloc:
tcc_error_noabort("Invalid relocation entry [%2d] '%s' @ %.8x",
i, strsec + sh->sh_name, (int)rel->r_offset);
goto the_end;
}
rel->r_info = ELFW(R_INFO)(sym_index, type);
/* offset the relocation offset */
rel->r_offset += offseti;
}
break;
default:
break;
}
}
ret = 0;
the_end:
tcc_free(symtab);
tcc_free(strtab);
tcc_free(old_to_new_syms);
tcc_free(sm_table);
tcc_free(strsec);
tcc_free(shdr);
return ret;
}
typedef struct ArchiveHeader {
char ar_name[16]; /* name of this member */
char ar_date[12]; /* file mtime */
char ar_uid[6]; /* owner uid; printed as decimal */
char ar_gid[6]; /* owner gid; printed as decimal */
char ar_mode[8]; /* file mode, printed as octal */
char ar_size[10]; /* file size, printed as decimal */
char ar_fmag[2]; /* should contain ARFMAG */
} ArchiveHeader;
#define ARFMAG "`\n"
static unsigned long long get_be(const uint8_t *b, int n)
{
unsigned long long ret = 0;
while (n)
ret = (ret << 8) | *b++, --n;
return ret;
}
static int read_ar_header(int fd, int offset, ArchiveHeader *hdr)
{
char *p, *e;
int len;
lseek(fd, offset, SEEK_SET);
len = full_read(fd, hdr, sizeof(ArchiveHeader));
if (len != sizeof(ArchiveHeader))
return len ? -1 : 0;
if (memcmp(hdr->ar_fmag, ARFMAG, sizeof hdr->ar_fmag))
return -1;
p = hdr->ar_name;
for (e = p + sizeof hdr->ar_name; e > p && e[-1] == ' ';)
--e;
*e = '\0';
hdr->ar_size[sizeof hdr->ar_size-1] = 0;
return len;
}
/* load only the objects which resolve undefined symbols */
static int tcc_load_alacarte(TCCState *s1, int fd, int size, int entrysize)
{
int i, bound, nsyms, sym_index, len, ret = -1;
unsigned long long off;
uint8_t *data;
const char *ar_names, *p;
const uint8_t *ar_index;
ElfW(Sym) *sym;
ArchiveHeader hdr;
data = tcc_malloc(size);
if (full_read(fd, data, size) != size)
goto invalid;
nsyms = get_be(data, entrysize);
ar_index = data + entrysize;
ar_names = (char *) ar_index + nsyms * entrysize;
do {
bound = 0;
for (p = ar_names, i = 0; i < nsyms; i++, p += strlen(p)+1) {
Section *s = symtab_section;
sym_index = find_elf_sym(s, p);
if (!sym_index)
continue;
sym = &((ElfW(Sym) *)s->data)[sym_index];
if(sym->st_shndx != SHN_UNDEF)
continue;
off = get_be(ar_index + i * entrysize, entrysize);
len = read_ar_header(fd, off, &hdr);
if (len <= 0 || memcmp(hdr.ar_fmag, ARFMAG, 2)) {
invalid:
tcc_error_noabort("invalid archive");
goto the_end;
}
off += len;
if (s1->verbose == 2)
printf(" -> %s\n", hdr.ar_name);
if (tcc_load_object_file(s1, fd, off) < 0)
goto the_end;
++bound;
}
} while(bound);
ret = 0;
the_end:
tcc_free(data);
return ret;
}
/* load a '.a' file */
int tcc_load_archive(TCCState *s1, int fd, int alacarte)
{
ArchiveHeader hdr;
/* char magic[8]; */
int size, len;
unsigned long file_offset;
ElfW(Ehdr) ehdr;
/* skip magic which was already checked */
/* full_read(fd, magic, sizeof(magic)); */
file_offset = sizeof ARMAG - 1;
for(;;) {
len = read_ar_header(fd, file_offset, &hdr);
if (len == 0)
return 0;
if (len < 0)
return tcc_error_noabort("invalid archive");
file_offset += len;
size = strtol(hdr.ar_size, NULL, 0);
if (alacarte) {
/* coff symbol table : we handle it */
if (!strcmp(hdr.ar_name, "/"))
return tcc_load_alacarte(s1, fd, size, 4);
if (!strcmp(hdr.ar_name, "/SYM64/"))
return tcc_load_alacarte(s1, fd, size, 8);
} else if (tcc_object_type(fd, &ehdr) == AFF_BINTYPE_REL) {
if (s1->verbose == 2)
printf(" -> %s\n", hdr.ar_name);
if (tcc_load_object_file(s1, fd, file_offset) < 0)
return -1;
}
/* align to even */
file_offset = (file_offset + size + 1) & ~1;
}
}
#ifndef ELF_OBJ_ONLY
struct versym_info {
int nb_versyms;
ElfW(Verdef) *verdef;
ElfW(Verneed) *verneed;
ElfW(Half) *versym;
int nb_local_ver, *local_ver;
};
#define LD_TOK_NAME 256
#define LD_TOK_EOF (-1)
static int ld_inp(TCCState *s1)
{
char b;
if (s1->cc != -1) {
int c = s1->cc;
s1->cc = -1;
return c;
}
if (1 == read(s1->fd, &b, 1))
return b;
return CH_EOF;
}
/* return next ld script token */
static int ld_next(TCCState *s1, char *name, int name_size)
{
int c, d, ch;
char *q;
redo:
ch = ld_inp(s1);
switch(ch) {
case ' ':
case '\t':
case '\f':
case '\v':
case '\r':
case '\n':
goto redo;
case '/':
ch = ld_inp(s1);
if (ch == '*') { /* comment */
for (d = 0;; d = ch) {
ch = ld_inp(s1);
if (ch == CH_EOF || (ch == '/' && d == '*'))
break;
}
goto redo;
} else {
q = name;
*q++ = '/';
goto parse_name;
}
break;
case '\\':
/* case 'a' ... 'z': */
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
case 'u':
case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
/* case 'A' ... 'z': */
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
case '_':
case '.':
case '$':
case '~':
q = name;
parse_name:
for(;;) {
if (!((ch >= 'a' && ch <= 'z') ||
(ch >= 'A' && ch <= 'Z') ||
(ch >= '0' && ch <= '9') ||
strchr("/.-_+=$:\\,~", ch)))
break;
if ((q - name) < name_size - 1) {
*q++ = ch;
}
ch = ld_inp(s1);
}
s1->cc = ch;
*q = '\0';
c = LD_TOK_NAME;
break;
case CH_EOF:
c = LD_TOK_EOF;
break;
default:
c = ch;
break;
}
return c;
}
static int ld_add_file(TCCState *s1, const char filename[])
{
if (filename[0] == '/') {
if (CONFIG_SYSROOT[0] == '\0'
&& tcc_add_file_internal(s1, filename, AFF_TYPE_BIN) == 0)
return 0;
filename = tcc_basename(filename);
}
return -1;
}
static int ld_add_file_list(TCCState *s1, const char *cmd, int as_needed)
{
char filename[1024], libname[1016];
int t, group, nblibs = 0, ret = 0;
char **libs = NULL;
group = !strcmp(cmd, "GROUP");
if (!as_needed)
s1->new_undef_sym = 0;
t = ld_next(s1, filename, sizeof(filename));
if (t != '(') {
ret = tcc_error_noabort("( expected");
goto lib_parse_error;
}
t = ld_next(s1, filename, sizeof(filename));
for(;;) {
libname[0] = '\0';
if (t == LD_TOK_EOF) {
ret = tcc_error_noabort("unexpected end of file");
goto lib_parse_error;
} else if (t == ')') {
break;
} else if (t == '-') {
t = ld_next(s1, filename, sizeof(filename));
if ((t != LD_TOK_NAME) || (filename[0] != 'l')) {
ret = tcc_error_noabort("library name expected");
goto lib_parse_error;
}
pstrcpy(libname, sizeof libname, &filename[1]);
snprintf(filename, sizeof filename, "lib%s.a", libname);
} else if (t != LD_TOK_NAME) {
ret = tcc_error_noabort("filename expected");
goto lib_parse_error;
}
if (!strcmp(filename, "AS_NEEDED")) {
ret = ld_add_file_list(s1, cmd, 1);
if (ret)
goto lib_parse_error;
} else {
/* TODO: Implement AS_NEEDED support. */
/* DT_NEEDED is not used any more so ignore as_needed */
if (1 || !as_needed) {
ret = ld_add_file(s1, filename);
if (ret)
goto lib_parse_error;
if (group) {
/* Add the filename *and* the libname to avoid future conversions */
dynarray_add(&libs, &nblibs, tcc_strdup(filename));
if (libname[0] != '\0')
dynarray_add(&libs, &nblibs, tcc_strdup(libname));
}
}
}
t = ld_next(s1, filename, sizeof(filename));
if (t == ',') {
t = ld_next(s1, filename, sizeof(filename));
}
}
if (group && !as_needed) {
while (s1->new_undef_sym) {
int i;
s1->new_undef_sym = 0;
for (i = 0; i < nblibs; i ++)
ld_add_file(s1, libs[i]);
}
}
lib_parse_error:
dynarray_reset(&libs, &nblibs);
return ret;
}
/* interpret a subset of GNU ldscripts to handle the dummy libc.so
files */
int tcc_load_ldscript(TCCState *s1, int fd)
{
char cmd[64];
char filename[1024];
int t, ret;
s1->fd = fd;
s1->cc = -1;
for(;;) {
t = ld_next(s1, cmd, sizeof(cmd));
if (t == LD_TOK_EOF)
return 0;
else if (t != LD_TOK_NAME)
return -1;
if (!strcmp(cmd, "INPUT") ||
!strcmp(cmd, "GROUP")) {
ret = ld_add_file_list(s1, cmd, 0);
if (ret)
return ret;
} else if (!strcmp(cmd, "OUTPUT_FORMAT") ||
!strcmp(cmd, "TARGET")) {
/* ignore some commands */
t = ld_next(s1, cmd, sizeof(cmd));
if (t != '(')
return tcc_error_noabort("( expected");
for(;;) {
t = ld_next(s1, filename, sizeof(filename));
if (t == LD_TOK_EOF) {
return tcc_error_noabort("unexpected end of file");
} else if (t == ')') {
break;
}
}
} else {
return -1;
}
}
return 0;
}
#endif /* !ELF_OBJ_ONLY */