8000 Lookup symbols using ELF symbol tables (SHT_SUMTAB, SHT_DYNSYM) · randomstuff/unjit@965db7f · GitHub
[go: up one dir, main page]

Skip to content

Commit 965db7f

Browse files
author
Gabriel Corona
committed
Lookup symbols using ELF symbol tables (SHT_SUMTAB, SHT_DYNSYM)
1 parent d8745c6 commit 965db7f

File tree

7 files changed

+260
-13
lines changed

7 files changed

+260
-13
lines changed

CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@ include_directories(${LLVM_INCLUDE_DIRS})
66

77
add_executable(unjit
88
src/unjit.cpp src/Process.cpp src/Disassembler.cpp
9-
src/Vma.cpp)
9+
src/Vma.cpp
10+
src/Module.cpp)
1011
add_definitions(-D_XOPEN_SOURCE=700)
1112

1213
# llvm_map_components_to_libnames(llvm_libs support core mcdisassembler native)
1314
# target_link_libraries(unjit ${llvm_libs})
1415

1516
target_link_libraries(unjit LLVM-${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR})
17+
target_link_libraries(unjit elf)
1618
set_property(TARGET unjit PROPERTY CXX_STANDARD 11)
1719
set_property(TARGET unjit PROPERTY CXX_STANDARD_REQUIRED ON)

src/Disassembler.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ void Disassembler::disassemble(std::ostream& stream, Symbol const& symbol)
9393
local.iov_base = buffer;
9494
local.iov_len = symbol.size;
9595

96-
remote.iov_base = (void*) symbol.start;
96+
remote.iov_base = (void*) symbol.value;
9797
remote.iov_len = symbol.size;
9898

9999
if (process_vm_readv(this->process_->pid(), &local, 1, &remote, 1, 0) != symbol.size) {
@@ -103,7 +103,7 @@ void Disassembler::disassemble(std::ostream& stream, Symbol const& symbol)
103103
}
104104

105105
stream << symbol.name << '\n';
106-
this->disassemble(stream, buffer, symbol.size, symbol.start);
106+
this->disassemble(stream, buffer, symbol.size, symbol.value);
107107
stream << '\n';
108108
}
109109

src/Module.cpp

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
/* The MIT License (MIT)
2+
3+
Copyright (c) 2015 Gabriel Corona
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in
13+
all copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21+
THE SOFTWARE.
22+
*/
23+
24+
#include <memory>
25+
26+
#include <unistd.h>
27+
#include <fcntl.h>
28+
29+
#include <libelf.h>
30+
31+
#include "unjit.hpp"
32+
33+
namespace unjit {
34+
35+
struct FileDescriptor {
36+
private:
37+
int fd_;
38+
public:
39+
explicit FileDescriptor(int fd) : fd_(fd) {}
40+
~FileDescriptor()
41+
{
42+
close(fd_);
43+
}
44+
FileDescriptor(FileDescriptor&) = delete;
45+
FileDescriptor& operator=(FileDescriptor&) = delete;
46+
operator int() const
47+
{
48+
return fd_;
49+
}
50+
};
51+
52+
class elf_deleter {
53+
public:
54+
void operator()(Elf* elf) const
55+
{
56+
elf_end(elf);
57+
}
58+
};
59+
60+
static Elf_Scn *elf_scn(Elf *elf, Elf64_Word sh_type)
61+
{
62+
for (Elf_Scn *scn = elf_getscn(elf, 0); scn; scn = elf_nextscn(elf, scn)) {
63+
Elf64_Shdr *shdr64 = elf64_getshdr(scn);
64+
if (shdr64 && shdr64->sh_type == sh_type)
65+
return scn;
66+
Elf32_Shdr *shdr32 = elf32_getshdr(scn);
67+
if (shdr32 && shdr32->sh_type == sh_type)
68+
return scn;
69+
}
70+
return nullptr;
71+
}
72+
73+
static Elf_Scn *elf_scn_symbol(Elf *elf)
74+
{
75+
Elf_Scn *scn = elf_scn(elf, SHT_SYMTAB);
76+
if (scn)
77+
return scn;
78+
else
79+
return elf_scn(elf, SHT_DYNSYM);
80+
}
81+
82+
static Elf64_Half elf_e_type(Elf *elf)
83+
{
84+
Elf32_Ehdr* ehdr32 = elf32_getehdr(elf);
85+
Elf64_Ehdr* ehdr64 = elf64_getehdr(elf);
86+
Elf64_Half e_type;
87+
if (ehdr32)
88+
e_type = ehdr32->e_type;
89+
else if (ehdr64)
90+
e_type = ehdr64->e_type;
91+
else
92+
return ET_NONE;
93+
}
94+
95+
std::shared_ptr<Module> load_module(std::string const& name)
96+
{
97+
// Init libelf:
98+
if (elf_version(EV_CURRENT) == EV_NONE) {
99+
std::cerr << "Elf version error\n";
100+
return nullptr;
101+
}
102+
103+
// Open the file:
104+
FileDescriptor fd(open(name.c_str(), O_RDONLY));
105+
if (fd < 0) {
106+
std::cerr << "Could not open file " << name << "\n";
107+
return nullptr;
108+
}
109+
std::unique_ptr<Elf, elf_deleter> elf(elf_begin(fd, ELF_C_READ, nullptr));
110+
if (!elf) {
111+
std::cerr << "Could not open file " << name << " with libelf\n";
112+
return nullptr;
113+
}
114+
115+
// Check if it is a suitable ELF file:
116+
if (elf_kind(elf.get()) != ELF_K_ELF)
117+
return nullptr;
118+
119+
std::shared_ptr<Module> module(new Module());
120+
module->name = name;
121+
122+
Elf64_Half e_type = elf_e_type(elf.get());
123+
switch(e_type) {
124+
case ET_EXEC:
125+
module->absolute_address = true;
126+
break;
127+
case ET_DYN:
128+
module->absolute_address = false;
129+
break;
130+
default:
131+
return nullptr;
132+
}
133+
134+
// Find SHT_SYMTAB ot SHT_DYNSYM (symbol table):
135+
Elf_Scn *symbol_scn = elf_scn_symbol(elf.get());
136+
if (!symbol_scn)
137+
return nullptr;
138+
size_t symbol_index = elf_ndxscn(symbol_scn);
139+
140+
Elf32_Shdr *shdr32 = elf32_getshdr(symbol_scn);
141+
Elf64_Shdr *shdr64 = elf64_getshdr(symbol_scn);
142+
if (!shdr32 && !shdr64)
143+
return nullptr;
144+
Elf64_Word sh_link = shdr64 ? shdr64->sh_link : shdr32->sh_link;
145+
Elf64_Xword sh_entsize = shdr64 ? shdr64->sh_entsize : shdr32->sh_entsize;
146+
Elf64_Xword sh_size = shdr64 ? shdr64->sh_size : shdr32->sh_size;
147+
148+
uint64_t sh_entry_count = sh_size / sh_entsize;
149+
if (!sh_entsize)
150+
return nullptr;
151+
152+
// For each element in the symbol table (we skip the first element with
153+
// is always a NULL entry):
154+
Elf_Data *data = elf_getdata(symbol_scn, NULL);
155+
for (int i = 1; i != sh_entry_count; ++i) {
156+
Elf64_Sym *sym64 = shdr64 ? (Elf64_Sym*) data->d_buf + i : nullptr;
157+
Elf32_Sym *sym32 = shdr64 ? nullptr : (Elf32_Sym*) data->d_buf + i;
158+
159+
Elf64_Word st_name = shdr64 ? sym64->st_name : sym32->st_name;
160+
if (!st_name)
161+
continue;
162+
Elf64_Section st_shndx = shdr64 ? sym64->st_shndx : sym32->st_shndx;
163+
Elf64_Addr st_value = shdr64 ? sym64->st_value : sym32->st_value;
164+
Elf64_Xword st_size = shdr64 ? sym64->st_size : sym32->st_size;
165+
if (st_shndx == SHN_UNDEF || st_shndx == SHN_ABS)
166+
continue;
167+
char *symbol_name = elf_strptr(elf.get(), sh_link, st_name);
168+
if (!symbol_name)
169+
continue;
170+
171+
Symbol symbol;
172+
symbol.value = st_value;
173+
symbol.size = st_size;
174+
symbol.name = std::string(symbol_name);
175+
module->symbols[st_value] = std::move(symbol);
176+
}
177+
178+
return std::move(module);
179+
}
180+
181+
}

src/Process.cpp

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,36 @@ void Process::load_vm_maps()
8686
vma.flags = MAP_SHARED;
8787
vma.offset = strtoll(match[7].str().c_str(), nullptr, 16);
8888
vma.name = match[8];
89-
std::cerr << vma;
9089
this->vmas_.push_back(std::move(vma));
9190
}
9291
}
9392

93+
void Process::load_modules()
94+
{
95+
this->areas_.clear();
96+
size_t n = this->vmas_.size();
97+
for (size_t i = 0; i != n; ++ i) {
98+
99+
Vma const& vma = this->vmas_[i];
100+
if (vma.name.empty() || vma.name[0] == '[')
101+
continue;
102+
103+
// Find the end of the module:
104+
do { ++i; } while (i != n && this->vmas_[i].name == vma.name);
105+
if (this->vmas_[i].name.empty() && i != n)
106+
++i;
107+
std::uint64_t end = this->vmas_[i - 1].end;
108+
109+
std::shared_ptr<Module> module = load_module(vma.name);
110+
111+
ModuleArea area;
112+
area.start = vma.start;
113+
area.end = end;
114+
area.module = std::move(module);
115+
this->areas_.push_back(std::move(area));
116+
}
117+
}
118+
94119
void Process::load_map_file()
95120
{
96121
std::string filename =
@@ -107,20 +132,31 @@ void Process::load_map_file(std::string const& map_file)
107132
while (getline(file, line)) {
108133
Symbol symbol;
109134
int name_index;
110-
if (sscanf(line.c_str(), "%" SCNx64 " %" SCNx64 "%n", &symbol.start, &symbol.size, &name_index) == 2) {
135+
if (sscanf(line.c_str(), "%" SCNx64 " %" SCNx64 "%n", &symbol.value, &symbol.size, &name_index) == 2) {
111136
while (name_index < line.size() && line[name_index] == ' ')
112137
++name_index;
113138
symbol.name = std::string(line.c_str() + name_index);
114-
this->jit_symbols_[symbol.start] = std::move(symbol);
139+
this->jit_symbols_[symbol.value] = std::move(symbol);
115140
}
116141
}
117142
}
118143

119-
const char* Process::lookup_symbol(uint64_t ReferenceValue)
144+
const char* Process::lookup_symbol(std::uint64_t ReferenceValue)
120145
{
121-
auto i = this->jit_symbols_.find(ReferenceValue);
122-
if (i != this->jit_symbols_.end())
123-
return i->second.name.c_str();
146+
{
147+
auto i = this->jit_symbols_.find(ReferenceValue);
148+
if (i != this->jit_symbols_.end())
149+
return i->second.name.c_str();
150+
}
151+
152+
unjit::ModuleArea const* area = this->find_module_area(ReferenceValue);
153+
if (area) {
154+
std::uint64_t relative_address =
155+
area->module->absolute_address ? ReferenceValue : ReferenceValue - area->start;
156+
auto i = area->module->symbols.find(relative_address);
157+
if (i != this->jit_symbols_.end())
158+
return i->second.name.c_str();
159+
}
124160
return NULL;
125161
}
126162

src/Vma.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ std::ostream& operator<<(std::ostream& stream, Vma const& vma)
4343
<< ' ' << '-'
4444
<< ' ' << '-'
4545
<< ' ' << vma.name
46-
<< '\n';
46+
<< '\n' << std::setw(0) << std::dec;
4747
return stream;
4848
}
4949

src/unjit.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@ int main(int argc, const char** argv)
4040
pid_t pid = std::atoll(argv[1]);
4141
unjit::Process process(pid);
4242
process.load_vm_maps();
43+
process.load_modules();
4344
process.load_map_file();
4445

4546
unjit::Disassembler disassembler(process);
4647

47-
// Disassemble:
4848
for (auto const& k : process.jit_symbols())
4949
disassembler.disassemble(std::cout, k.second);
5050

src/unjit.hpp

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ THE SOFTWARE.
3939
namespace unjit {
4040

4141
struct Symbol {
42-
std::uint64_t start;
42+
std::uint64_t value;
4343
std::uint64_t size;
4444
std::string name;
4545
};
@@ -54,16 +54,36 @@ struct Vma {
5454

5555
std::ostream& operator<<(std::ostream& stream, Vma const& vma);
5656

57+
struct Module {
58+
std::string name;
59+
std::unordered_map<std::uint64_t, Symbol> symbols;
60+
bool absolute_address;
61+
};
62+
63+
std::shared_ptr<Module> load_module(std::string const& name);
64+
65+
struct ModuleArea {
66+
std::uint64_t start, end;
67+
std::shared_ptr<Module> module;
68+
69+
bool contains(std::uint64_t address) const
70+
{
71+
return address >= this->start && address < this->end;
72+
}
73+
};
74+
5775
class Process {
5876
private:
5977
pid_t pid_;
6078
std::unordered_map<std::uint64_t, Symbol> jit_symbols_;
6179
std::vector<Vma> vmas_;
80+
std::vector<ModuleArea> areas_;
6281

6382
public:
6483
Process(pid_t pid);
6584
~Process();
6685
void load_vm_maps();
86+
void load_modules();
6787
void load_map_file();
6888
void load_map_file(std::string const& map_file);
6989
const char* lookup_symbol(uint64_t ReferenceValue);
@@ -83,6 +103,14 @@ class Process {
83103
return vmas_;
84104
}
85105

106+
ModuleArea const* find_module_area(std::uint64_t address) const
107+
{
108+
for (ModuleArea const& area : areas_)
109+
if (area.contains(address))
110+
return &area;
111+
return nullptr;
112+
}
113+
86114
};
87115

88116
class Disassembler {

0 commit comments

Comments
 (0)
0