bigcode-project
diff --git a/‎evaluation/ds_1000.py
Lines changed: 264 additions & 0 deletions b/‎evaluation/ds_1000.py
Lines changed: 264 additions & 0 deletions
@@ -0,0 +1,264 @@
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Callable, Literal, cast
+from transformers import<
10000
/span> AutoTokenizer
+from ds1000 import DS1000Dataset, DS1000Problem
+from tqdm.auto import tqdm
+from transformers import HfArgumentParser
+
+from star_align.llm_wrapper import (
+    GenerationConfig,
+    ModelContext,
+    create_infilling_prompt,
+    get_model_context,
+)
+from star_align.utils import infer_prompt_template
+
+from vllm import LLM, SamplingParams
+
+PROMPT = cast(str, None)
+
+
+@dataclass
+class Args:
+    dataset_path: str
+    model_key: str
+    model_name_or_path: str
+    mode: Literal["Insertion", "Completion"]
+    output_dir: str
+
+    temperature: float = field(default=0.2)
+    top_p: float = field(default=0.95)
+    max_length: int = field(default=1024)
+    n_samples_per_batch: int = field(default=5)
+    n_batches: int = field(default=8)
+
+    def to_generation_config(self) -> GenerationConfig:
+        return GenerationConfig(
+            # Use max_length to control
+            max_new_tokens=9999999999999,
+            top_p=self.top_p,
+            temperature=self.temperature,
+            max_length=self.max_length,
+        )
+
+
+def postprocess(text: str) -> str:
+    return text.split("```")[0]
+
+
+def create_prompt(args: Args, tokenizer: AutoTokenizer, problem: DS1000Problem) -> str:
+    prompt = problem["prompt"]
+    if args.mode == "Insertion":
+        prompt = preprocess_insertion_prompt(prompt)
+        assert prompt.count("[insert]") == 1
+        prefix, suffix = prompt.split("[insert]")
+        prompt = create_infilling_prompt(
+            model_key=args.model_key,
+            prefix=prefix,
+            suffix=suffix,
+            tokenizer=tokenizer,
+        )
+    else:
+        assert args.mode == "Completion"
+        instruction, response_prefix = preprocess_completion_prompt(problem["prompt"])
+        prompt = PROMPT.format(
+            instruction=instruction,
+            response=response_prefix,
+        )
+    return prompt
+
+
+def generate(
+    args: Args,
+    # model_context: ModelContext,
+    engine: LLM,
+    problem: DS1000Problem,
+):
+    lib: str = problem["lib"]
+    model_key = args.model_key.replace("/", "-")
+    problem_id: str = f"q{problem.problem_id}"
+    path = Path(args.output_dir) / model_key / lib / args.mode / problem_id
+    finishing_signal = path / "FINISHED"
+    if finishing_signal.exists():
+        print("Skipping:", path)
+        return
+    if not path.exists():
+        print("Making directory:", path)
+        path.mkdir(parents=True, exist_ok=True)
+    # config = args.to_generation_config()
+    prompt = create_prompt(args, engine.get_tokenizer(), problem)
+    print("========PROMPT=======")
+    print(prompt)
+    print("========PROMPT=======")
+
+    sampling_params = SamplingParams(
+        n=args.n_batches * args.n_samples_per_batch,
+        temperature=args.temperature,
+        max_tokens=args.max_length,
+        top_k=-1,
+        top_p=args.top_p,
+        stop=["```"],
+    )
+
+    # for batch_idx in range(args.n_batches):
+        # print(f"Generating batch {batch_idx} of {args.n_batches}")
+        # response = model_context.complete(
+        #     config=config,
+        #     prompts=[prompt] * args.n_samples_per_batch,
+        #     stop_tokens=["```"] if os.getenv("STOP") is not None else None,
+        # )
+    print(f"Generating {args.n_batches * args.n_samples_per_batch} samples")
+    results = engine.generate(prompt, sampling_params)
+    assert len(results) == 1
+    print("=======RESPOSE[-1]=======")
+    # postprocess_fn: Callable[[str], str] = (
+    #     (lambda x: x) if args.mode == "Insertion" else postprocess
+    # )
+    postprocess_fn = postprocess
+    print(postprocess_fn(results[0].outputs[-1].text))
+    # print("=======RESPOSE[-1]=======")
+    # print("=======RESPOSE[RAW]=======")
+    # print(response.decoded_outputs[-1])
+    # print("=======RESPOSE[RAW]=======")
+    # exit()
+    assert len(results[0].outputs) == args.n_batches * args.n_samples_per_batch
+    for idx, output in enumerate(results[0].outputs):
+        sample = output.text
+        sample = postprocess_fn(sample)
+        # global_index = batch_idx * args.n_samples_per_batch + idx
+        global_index = idx
+        output_file = path / f"{global_index}.py"
+        output_file.write_text(sample)
+    finishing_signal.touch()
+
 
+def preprocess_completion_prompt(prompt: str) -> tuple[str, str]:
+    """Preprocess the DS-1000 prompt (Completion mode) into instruction and response prefix"""
+    # hit = False
+    if not "SOLUTION START" in prompt:
+        answer_index = prompt.rindex("A:")
+        answer = prompt[answer_index + 2 :].strip()
+        instruction: str = prompt[:answer_index].strip()
+        if instruction.startswith("Problem:"):
+            instruction = instruction[len("Problem:") :].strip()
+        if "### BEGIN SOLUTION" in prompt:
+            assert prompt.count("<code>") == 1
+            assert prompt.count("</code>") == 0
+            lines = answer.splitlines(keepends=True)
+            return_line, result_line, begin_line = lines[-3:]
+            assert return_line.strip().startswith("# return")
+            assert result_line.strip().startswith("# ")
+            assert begin_line.strip() == "### BEGIN SOLUTION"
+            response = "".join(lines[:-3]).strip()
+            hint = begin_line.replace("###", "#").replace("BEGIN SOLUTION", "Solution")
+            response += f"\n{hint}\n"
+        else:
+            assert "BEGIN SOLUTION" in prompt
+            assert prompt.count("<code>") == 2
+            assert prompt.count("</code>") == 1
+            first_block_start = prompt.index("<code>")
+            first_block_end = prompt.index("</code>")
+            second_block_start = prompt.index("<code>", first_block_start + 1)
+            assert first_block_end < second_block_start
+            lines = answer.splitlines(keepends=True)
+            block_end, instruction_line, begin_line, block_start = lines[-4:]
+            assert begin_line.strip() == "BEGIN SOLUTION"
+            assert block_start.strip() == "<code>"
+            if not block_end.strip() == "</code>":
+                if lines[-6].strip() == "</code>":
+                    response_prefix = lines[:-6]
+                    starting_lines = lines[-5:-2]
+                else:
+                    assert instruction_line.strip() == "</code>"
+                    response_prefix = lines[:-3]
+                    starting_lines = lines[-2:-2]
+            else:
+                response_prefix = lines[:-4]
+                starting_lines = lines[-3:-2]
+            starting_lines = [f"# {line.lstrip()}" for line in starting_lines]
+            response = "".join([*response_prefix, *starting_lines]).strip()
+            response += "\n# Solution\n"
+    else:
+        # hit = True
+        assert prompt.count("<code>") == 0
+        assert prompt.count("</code>") == 0
+        assert prompt.strip().endswith("# SOLUTION START")
+        code_prefix = prompt[: prompt.rindex("# SOLUTION START")].strip()
+        instruction = f"""Write a solution to the following problem:
+```python
+{code_prefix}
+```"""
+        response = f"```python\n{code_prefix}\n# Solution\n"
+    instruction = instruction.replace("<code>", "```python").replace("</code>", "```")
+    response = response.replace("<code>", "```python").replace("</code>", "```")
+    # if hit:
+    #     print("[Instruction]")
+    #     print(instruction)
+    #     print("[Response]")
+    #     print(response)
+    #     breakpoint()
+    return instruction, response
+
+
+def preprocess_insertion_prompt(prompt: str) -> str:
+    pattern = """</code>
+BEGIN SOLUTION
+<code>
+[insert]
+</code>
+END SOLUTION"""
+    pattern_index = prompt.index(pattern)
+    # pattern_block = prompt[pattern_index:]
+    prefix = prompt[:pattern_index]
+    # hit = False
+    if pattern + "\n<code>" in prompt:
+        index = prompt.index("<code>", pattern_index + len(pattern))
+        suffix = prompt[index + len("<code>") :]
+    else:
+        # hit = True
+        assert pattern in prompt
+        suffix = ""
+    final_prompt = prefix.strip() + "\n[insert]\n" + suffix.strip()
+    final_prompt = final_prompt.replace("<code>", "```python").replace("</code>", "```")
+    # if hit:
+    #     print(final_prompt)
+    #     breakpoint()
+    return final_prompt
+
+
+def main():
+    args = cast(Args, HfArgumentParser(Args).parse_args_into_dataclasses()[0])
+    dataset = DS1000Dataset(args.dataset_path, mode=args.mode)
+
+    global PROMPT
+    if (inferred := os.getenv("INFER")) is not None:
+        if inferred == "1":
+            PROMPT = infer_prompt_template(args.model_name_or_path)
+        else:
+            PROMPT = infer_prompt_template(inferred)
+
+    print("Using prompt:")
+    print(PROMPT)
+
+    all_problems = [
+        problem
+        for problems in dataset.data.values()
+        for problem in problems
+        if args.mode == "Completion" or problem["lib"] != "Matplotlib"
+    ]
+    engine = LLM(
+        tokenizer=args.model_key, model=args.model_name_or_path or args.model_key
+    )
+    # model_context = get_model_context(
+    #     model_key=args.model_key,
+    #     model_name_or_path=args.model_name_or_path,
+    # )
+    for problem in tqdm(all_problems):
+        # generate(args, model_context, problem)
+        generate(args, engine, problem)
+
+
+if __name__ == "__main__":
+    main()