8000 Support bytecode format for the 3.6 release (#2) · tempbottle/pythonvm-rust@311420a · GitHub
[go: up one dir, main page]

Skip to content

Commit 311420a

Browse files
authored
Support bytecode format for the 3.6 release (progval#2)
* Start supporting wordcode. * Named arguments are better. * Fully debug wordcode. * Support extended opcodes. * Add NOPs again, to fix the numbering. * Fix previous commit. * Various stuff. * Working on 3.6 support... * Fix indent. * Fix SetupExcept. * Implement CallFunctionKw. * Fix test to use Python 3.6 bytecode.
1 parent fedc0ac commit 311420a

File tree

8 files changed

+125
-71
lines changed

8 files changed

+125
-71
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ rust:
44
- beta
55
- nightly
66
before_script:
7-
- wget https://github.com/python/cpython/archive/3.5.zip -O cpython.zip
7+
- wget https://github.com/python/cpython/archive/3.6.zip -O cpython.zip
88
- unzip cpython.zip
99
- cd cpython-*
1010
- ./configure --prefix=$HOME/.local/

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ A Python virtual machine, written in Rust.
2323

2424
## Dependencies
2525

26-
* CPython 3.6 (used as a parser and bytecode compiler). Older versions down to 3.4 should work, but their support is not tested.
26+
* CPython 3.6 (used as a parser and bytecode compiler).
2727
* [Rust](https://www.rust-lang.org/downloads.html)
2828
* [Cargo](https://crates.io/install)
2929

src/marshal/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,6 @@ pub fn check_magic(buf: &[u8]) -> bool {
1818
false
1919
}
2020
else {
21-
3310 <= version /* ≥ 3.4rc2 */ && version < 3390 /* < 3.7 */
21+
3379 <= version /* ≥ 3.6rc1 */ && version < 3390 /* < 3.7 */
2222
}
2323
}

src/primitives/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,9 @@ fn build_class<EP: EnvProxy>(state: &mut State<EP>, call_stack: &mut Vec<Frame>,
8989
let mut instructions: Vec<Instruction> = InstructionDecoder::new(code.code.iter()).collect();
9090

9191
// Hack to made the class' code return the class instead of None
92-
assert_eq!(instructions.pop(), Some(Instruction::ReturnValue));
92+
let mut last_instruction;
93+
while {last_instruction = instructions.pop(); last_instruction == Some(Instruction::Nop)} {};
94+
assert_eq!(last_instruction, Some(Instruction::ReturnValue));
9395
instructions.pop(); // LoadConst None
9496
instructions.push(Instruction::PushImmediate(cls_ref.clone()));
9597
instructions.push(Instruction::ReturnValue);

src/processor/instructions.rs

Lines changed: 58 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ pub enum CmpOperator {
1818
}
1919

2020
impl CmpOperator {
21-
pub fn from_bytecode(n: u32) -> Self {
21+
pub fn from_bytecode(n: usize) -> Self {
2222
match n {
2323
0 => CmpOperator::Lt,
2424
1 => CmpOperator::Leq,
@@ -69,9 +69,10 @@ pub enum Instruction {
6969
LoadFast(usize),
7070
StoreFast(usize),
7171
LoadGlobal(usize),
72-
CallFunction(usize, usize), // nb_args, nb_kwargs
73-
RaiseVarargs(u16),
74-
MakeFunction(usize, usize, usize), // nb_default_args, nb_default_kwargs, nb_annot
72+
CallFunction(usize, bool), // nb_args + nb_kwargs, has_kwargs
73+
RaiseVarargs(usize),
74+
MakeFunction { has_defaults: bool, has_kwdefaults: bool, has_annotations: bool, has_closure: bool },
75+
BuildConstKeyMap(usize),
7576
}
7677

7778
#[derive(Debug)]
@@ -123,55 +124,65 @@ impl<'a, I> Iterator for InstructionDecoder<I> where I: Iterator<Item=&'a u8> {
123124
self.pending_nops -= 1;
124125
return Some(Instruction::Nop)
125126
};
126-
self.bytestream.next().map(|opcode| {
127-
match *opcode {
128-
1 => Instruction::PopTop,
129-
4 => Instruction::DupTop,
130-
25 => Instruction::BinarySubscr,
131-
68 => Instruction::GetIter,
132-
71 => Instruction::LoadBuildClass,
133-
83 => Instruction::ReturnValue,
134-
87 => Instruction::PopBlock,
135-
88 => Instruction::EndFinally,
136-
89 => Instruction::PopExcept,
137-
90 => Instruction::StoreName(self.read_argument() as usize),
138-
93 => Instruction::ForIter(self.read_argument() as usize),
139-
95 => Instruction::StoreAttr(self.read_argument() as usize),
140-
97 => Instruction::StoreGlobal(self.read_argument() as usize),
141-
100 => Instruction::LoadConst(self.read_argument() as usize),
142-
101 => Instruction::LoadName(self.read_argument() as usize),
143-
102 => Instruction::BuildTuple(self.read_argument() as usize),
144-
106 => Instruction::LoadAttr(self.read_argument() as usize),
145-
107 => Instruction::CompareOp(CmpOperator::from_bytecode(self.read_argument())),
146-
110 => Instruction::JumpForward(self.read_argument() as usize + 2), // +2, because JumpForward takes 3 bytes, and the relative address is computed from the next instruction.
147-
113 => Instruction::JumpAbsolute(self.read_argument() as usize),
148-
114 => Instruction::PopJumpIfFalse(self.read_argument() as usize),
149-
116 => Instruction::LoadGlobal(self.read_argument() as usize),
150-
120 => Instruction::SetupLoop(self.read_argument() as usize + 2),
151-
121 => Instruction::SetupExcept(self.read_argument() as usize + 2),
152-
124 => Instruction::LoadFast(self.read_argument() as usize),
153-
125 => Instruction::StoreFast(self.read_argument() as usize),
154-
130 => Instruction::RaiseVarargs(self.read_argument() as u16),
155-
131 => Instruction::CallFunction(self.read_byte() as usize, self.read_byte() as usize),
156-
132 => {
157-
let arg = self.read_argument();
158-
let nb_pos = arg & 0xFF;
159-
let nb_kw = (arg >> 8) & 0xFF;
160-
//let nb_annot = (arg >> 16) & 0x7FF; // TODO
161-
let nb_annot = 0;
162-
Instruction::MakeFunction(nb_pos as usize, nb_kw as usize, nb_annot as usize)
163-
},
164-
144 => { self.arg_prefix = Some(self.read_argument()); Instruction::Nop },
165-
_ => panic!(format!("Opcode not supported: {}", opcode)),
127+
let mut opcode = 144;
128+
let mut oparg: usize = 0;
129+
while opcode == 144 {
130+
match self.bytestream.next() {
131+
Some(op) => { opcode = *op },
132+
None => return None,
166133
}
167-
})
134+
oparg = (oparg << 8) | (*self.bytestream.next().unwrap() as usize);
135+
self.pending_nops += 1;
136+
}
137+
self.pending_nops -= 1;
138+
let inst = match opcode {
139+
1 => Instruction::PopTop,
140+
4 => Instruction::DupTop,
141+
25 => Instruction::BinarySubscr,
142+
68 => Instruction::GetIter,
143+
71 => Instruction::LoadBuildClass,
144+
83 => Instruction::ReturnValue,
145+
87 => Instruction::PopBlock,
146+
88 => Instruction::EndFinally,
147+
89 => Instruction::PopExcept,
148+
90 => Instruction::StoreName(oparg),
149+
93 => Instruction::ForIter(oparg),
150+
95 => Instruction::StoreAttr(oparg),
151+
97 => Instruction::StoreGlobal(oparg),
152+
100 => Instruction::LoadConst(oparg),
153+
101 => Instruction::LoadName(oparg),
154+
102 => Instruction::BuildTuple(oparg),
155+
106 => Instruction::LoadAttr(oparg),
156+
107 => Instruction::CompareOp(CmpOperator::from_bytecode(oparg)),
157+
110 => Instruction::JumpForward(oparg),
158+
113 => Instruction::JumpAbsolute(oparg),
159+
114 => Instruction::PopJumpIfFalse(oparg),
160+
116 => Instruction::LoadGlobal(oparg),
161+
120 => Instruction::SetupLoop(oparg + 1),
162+
121 => Instruction::SetupExcept(oparg + 1),
163+
124 => Instruction::LoadFast(oparg),
164+
125 => Instruction::StoreFast(oparg),
165+
130 => Instruction::RaiseVarargs(oparg),
166+
131 => Instruction::CallFunction(oparg, false),
167+
132 => Instruction::MakeFunction {
168+
has_defaults: oparg & 0x01 != 0,
169+
has_kwdefaults: oparg & 0x02 != 0,
170+
has_annotations: oparg & 0x04 != 0,
171+
has_closure: oparg & 0x08 != 0,
172+
},
173+
141 => Instruction::CallFunction(oparg, true),
174+
156 => Instruction::BuildConstKeyMap(oparg),
175+
144 => panic!("The impossible happened."),
176+
_ => panic!(format!("Opcode not supported: {:?}", (opcode, oparg))),
177+
};
178+
Some(inst)
168179
}
169180
}
170181

171182
#[test]
172183
fn test_load_read() {
173-
let bytes: Vec<u8> = vec![124, 1, 0, 83];
184+
let bytes: Vec<u8> = vec![124, 1, 83, 0];
174185
let reader = InstructionDecoder::new(bytes.iter());
175186
let instructions: Vec<Instruction> = reader.collect();
176-
assert_eq!(vec![Instruction::LoadFast(1), Instruction::Nop, Instruction::Nop, Instruction::ReturnValue], instructions);
187+
assert_eq!(vec![Instruction::LoadFast(1), Instruction::ReturnValue], instructions);
177188
}

src/processor/mod.rs

Lines changed: 49 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ use super::state::{State, PyResult, unwind, raise, return_value};
1515
use super::sandbox::EnvProxy;
1616
use super::primitives;
1717

18+
const WORD_SIZE: usize = 2;
19+
1820
#[derive(Debug)]
1921
pub enum ProcessorError {
2022
CircularReference,
@@ -233,11 +235,12 @@ fn run_code<EP: EnvProxy>(state: &mut State<EP>, call_stack: &mut Vec<Frame>) ->
233235
let instruction = py_unwrap!(state, frame.instructions.get(frame.program_counter), ProcessorError::InvalidProgramCounter);
234236
// Useful for debugging:
235237
/*
236-
println!("");
238+
println!("======");
237239
for r in frame.var_stack.iter() {
238240
println!("{}", r.repr(&state.store));
239241
}
240-
println!("{} {:?}", frame.program_counter, instruction);
242+
println!("{} {:?}", frame.program_counter*WORD_SIZE, instruction);
243+
println!("======");
241244
*/
242245
frame.program_counter += 1;
243246
instruction.clone()
@@ -362,7 +365,7 @@ fn run_code<EP: EnvProxy>(state: &mut State<EP>, call_stack: &mut Vec<Frame>) ->
362365
Instruction::ForIter(i) => {
363366
let iterator = {
364367
let frame = call_stack.last_mut().unwrap();
365-
frame.block_stack.push(Block::ExceptPopGoto(state.primitive_objects.stopiteration.clone(), 1, frame.program_counter+i));
368+
frame.block_stack.push(Block::ExceptPopGoto(state.primitive_objects.stopiteration.clone(), 1, frame.program_counter+i/WORD_SIZE));
366369
let iterator = top_stack!(state, frame.var_stack);
367370
iterator.clone()
368371
};
@@ -436,7 +439,7 @@ fn run_code<EP: EnvProxy>(state: &mut State<EP>, call_stack: &mut Vec<Frame>) ->
436439
}
437440
Instruction::SetupExcept(i) => {
438441
let frame = call_stack.last_mut().unwrap();
439-
frame.block_stack.push(Block::TryExcept(frame.program_counter, frame.program_counter+i))
442+
frame.block_stack.push(Block::TryExcept(frame.program_counter, frame.program_counter+i/WORD_SIZE))
440443
}
441444
Instruction::CompareOp(CmpOperator::Eq) => {
442445
let frame = call_stack.last_mut().unwrap();
@@ -465,11 +468,11 @@ fn run_code<EP: EnvProxy>(state: &mut State<EP>, call_stack: &mut Vec<Frame>) ->
465468
}
466469
Instruction::JumpAbsolute(target) => {
467470
let frame = call_stack.last_mut().unwrap();
468-
frame.program_counter = target
471+
frame.program_counter = target / WORD_SIZE
469472
}
470473
Instruction::JumpForward(delta) => {
471474
let frame = call_stack.last_mut().unwrap();
472-
frame.program_counter += delta
475+
frame.program_counter += delta / WORD_SIZE
473476
}
474477
Instruction::LoadFast(i) => {
475478
let frame = call_stack.last_mut().unwrap();
@@ -487,7 +490,7 @@ fn run_code<EP: EnvProxy>(state: &mut State<EP>, call_stack: &mut Vec<Frame>) ->
487490
let obj = state.store.deref(&pop_stack!(state, frame.var_stack));
488491
match obj.content {
489492
ObjectContent::True => (),
490-
ObjectContent::False => frame.program_counter = target,
493+
ObjectContent::False => frame.program_counter = target / WORD_SIZE,
491494
_ => unimplemented!(),
492495
}
493496
}
@@ -509,21 +512,32 @@ fn run_code<EP: EnvProxy>(state: &mut State<EP>, call_stack: &mut Vec<Frame>) ->
509512
panic!("Bad RaiseVarargs argument") // TODO: Raise an exception instead
510513
}
511514

512-
Instruction::CallFunction(nb_args, nb_kwargs) => {
515+
Instruction::CallFunction(nb_args, has_kwargs) => {
513516
// See “Call constructs” at:
514517
// http://security.coverity.com/blog/2014/Nov/understanding-python-bytecode.html
515-
let kwargs;
518+
let kwargs: Vec<(ObjectRef, ObjectRef)>;
516519
let args;
517520
let func;
518521
{
519522
let frame = call_stack.last_mut().unwrap();
520-
kwargs = py_unwrap!(state, frame.var_stack.pop_n_pairs(nb_kwargs), ProcessorError::StackTooSmall);
521-
args = py_unwrap!(state, frame.var_stack.pop_many(nb_args), ProcessorError::StackTooSmall);
523+
if has_kwargs {
524+
let ref obj = state.store.deref(&pop_stack!(state, frame.var_stack)).content;
525+
let names: Vec<ObjectRef> = match obj {
526+
&ObjectContent::Tuple(ref v) => v.into_iter().cloned().collect(),
527+
_ => panic!("Bad CallFunctionKw argument"),
528+
};
529+
let values: Vec<ObjectRef> = frame.var_stack.pop_many(names.len()).unwrap();
530+
kwargs = names.into_iter().zip(values).collect();
531+
}
532+
else {
533+
kwargs = Vec::new();
534+
}
535+
args = py_unwrap!(state, frame.var_stack.pop_many(nb_args - kwargs.len()), ProcessorError::StackTooSmall);
522536
func = pop_stack!(state, frame.var_stack);
523537
}
524538
call_function(state, call_stack, &func, args, kwargs)
525539
},
526-
Instruction::MakeFunction(0, nb_default_kwargs, 0) => {
540+
Instruction::MakeFunction { has_defaults: false, has_kwdefaults, has_annotations: false, has_closure: false } => {
527541
// TODO: consume default arguments and annotations
528542
let obj = {
529543
let frame = call_stack.last_mut().unwrap();
@@ -540,18 +554,35 @@ fn run_code<EP: EnvProxy>(state: &mut State<EP>, call_stack: &mut Vec<Frame>) ->
540554
};
541555
let frame = call_stack.last_mut().unwrap();
542556
let code = pop_stack!(state, frame.var_stack);
543-
let raw_kwdefaults = py_unwrap!(state, frame.var_stack.pop_n_pairs(nb_default_kwargs), ProcessorError::StackTooSmall);
544557
let mut kwdefaults: HashMap<String, ObjectRef> = HashMap::new();
545-
kwdefaults.reserve(nb_default_kwargs);
546-
for (key, value) in raw_kwdefaults {
547-
match state.store.deref(&key).content {
548-
ObjectContent::String(ref s) => { kwdefaults.insert(s.clone(), value); },
549-
_ => panic!("Defaults' keys must be strings."),
558+
if has_kwdefaults {
559+
let obj = state.store.deref(&pop_stack!(state, frame.var_stack)).content.clone(); // TODO: clone only if necessary
560+
let raw_kwdefaults = match obj {
561+
ObjectContent::Dict(ref d) => d,
562+
_ => panic!("bad type for default kwd"),
563+
};
564+
kwdefaults.reserve(raw_kwdefaults.len());
565+
for &(ref key, ref value) in raw_kwdefaults {
566+
match state.store.deref(&key).content {
567+
ObjectContent::String(ref s) => { kwdefaults.insert(s.clone(), value.clone()); },
568+
_ => panic!("Defaults' keys must be strings."),
569+
}
550570
}
551571
}
552572
let func = state.primitive_objects.new_function(func_name, frame.object.module(&state.store), code, kwdefaults);
553573
frame.var_stack.push(state.store.allocate(func))
554574
},
575+
Instruction::BuildConstKeyMap(size) => {
576+
let frame = call_stack.last_mut().unwrap();
577+
let obj = state.store.deref(&pop_stack!(state, frame.var_stack)).content.clone(); // TODO: clone only if necessary
578+
let keys: Vec<ObjectRef> = match obj {
579+
ObjectContent::Tuple(ref v) => v.clone(),
580+
_ => panic!("bad BuildConstKeyMap keys argument."),
581+
};
582+
let values: Vec<ObjectRef> = frame.var_stack.peek(size).unwrap().iter().map(|r| (*r).clone()).collect();
583+
let dict = state.primitive_objects.new_dict(keys.into_iter().zip(values).collect());
584+
frame.var_stack.push(state.store.allocate(dict))
585+
}
555586
_ => panic!(format!("todo: instruction {:?}", instruction)),
556587
}
557588
};

src/varstack.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ pub trait VarStack : Debug {
99
fn push(&mut self, value: Self::Item);
1010
fn pop_all_and_get_n_last(&mut self, nb: usize) -> Option<Vec<Self::Item>>;
1111
fn pop_n_pairs(&mut self, nb: usize) -> Option<Vec<(Self::Item, Self::Item)>>;
12+
fn peek(&self, nb: usize) -> Option<Vec<&Self::Item>>;
1213
}
1314

1415
#[derive(Debug)]
@@ -75,4 +76,13 @@ impl<Item: Clone> VarStack for VectorVarStack<Item> where Item: Debug {
7576
pairs
7677
})
7778
}
79+
fn peek(&self, nb: usize) -> Option<Vec<&Self::Item>> {
80+
if nb > self.vector.len() {
81+
None
82+
}
83+
else {
84+
let length = self.vector.len();
85+
Some(self.vector[(length-nb)..length].iter().collect())
86+
}
87+
}
7888
}

tests/test_basic.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@ use pythonvm::{MockEnvProxy, PyResult, run_file};
66

77
#[test]
88
fn test_hello_world() {
9-
let mut reader: &[u8] = b"\xee\x0c\r\n\xb0\x92\x0fW\x15\x00\x00\x00\xe3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00@\x00\x00\x00s\x0e\x00\x00\x00e\x00\x00d\x00\x00\x83\x01\x00\x01d\x01\x00S)\x02z\x0bHello worldN)\x01\xda\x05print\xa9\x00r\x02\x00\x00\x00r\x02\x00\x00\x00\xfa\x16examples/helloworld.py\xda\x08<module>\x01\x00\x00\x00s\x00\x00\x00\x00";
9+
let mut reader: &[u8] = b"3\r\r\n\xe1\xc8\xf4Y\x15\x00\x00\x00\xe3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00@\x00\x00\x00s\x0c\x00\x00\x00e\x00d\x00\x83\x01\x01\x00d\x01S\x00)\x02z\x0bhello worldN)\x01\xda\x05print\xa9\x00r\x02\x00\x00\x00r\x02\x00\x00\x00\xfa\x16examples/helloworld.py\xda\x08<module>\x01\x00\x00\x00s\x00\x00\x00\x00";
1010
let mut path = PathBuf::new();
1111
path.push(env::current_dir().unwrap());
1212
path.push("pythonlib/");
1313
let envproxy = MockEnvProxy::new(path);
1414
let (processor, result) = run_file(&mut reader, envproxy).unwrap();
1515
if let PyResult::Return(_) = result {
16-
assert_eq!(*processor.envproxy.stdout_content.lock().unwrap(), b"Hello world\n");
16+
assert_eq!(*processor.envproxy.stdout_content.lock().unwrap(), b"hello world\n");
1717
}
1818
else {
1919
panic!(format!("Exited with: {:?}", result))

0 commit comments

Comments
 (0)
0