|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +echo "MODE: $MODE" |
| 4 | +echo "SEED_DATA_FILE: $SEED_DATA_FILE" |
| 5 | +echo "INDEX: $INDEX" |
| 6 | +echo "MAX_NEW_DATA: $MAX_NEW_DATA" |
| 7 | +echo "DIR: $1" |
| 8 | + |
| 9 | +NUM_GPUS=$(nvidia-smi --query-gpu=count --format=csv,noheader,nounits | head -n 1) |
| 10 | + |
| 11 | +DATA_CHUNK_SIZE=$(($MAX_NEW_DATA / $NUM_GPUS)) |
| 12 | +REMAINDER=$(($MAX_NEW_DATA % $NUM_GPUS)) |
| 13 | + |
| 14 | +if [[ "$MODE" == "I->R" ]]; then |
| 15 | + N_SAMPLES=1 |
| 16 | + NUM_FEWSHOTS=1 |
| 17 | + NUM_BATCHED_REQUESTS=4096 |
| 18 | + ASYNC_MICRO_BATCH_SIZE=16 |
| 19 | +else |
| 20 | + N_SAMPLES=1 |
| 21 | + NUM_FEWSHOTS=8 |
| 22 | + NUM_BATCHED_REQUESTS=4096 |
| 23 | + ASYNC_MICRO_BATCH_SIZE=8 |
| 24 | +fi |
| 25 | + |
| 26 | +echo "N_SAMPLES: $N_SAMPLES" |
| 27 | +echo "NUM_FEWSHOTS: $NUM_FEWSHOTS" |
| 28 | +echo "NUM_BATCHED_REQUESTS: $NUM_BATCHED_REQUESTS" |
| 29 | +echo "ASYNC_MICRO_BATCH_SIZE: $ASYNC_MICRO_BATCH_SIZE" |
| 30 | + |
| 31 | +PIDS=() |
| 32 | +function killall_pids { |
| 33 | + for pid in ${PIDS[@]}; do |
| 34 | + kill $pid |
| 35 | + done |
| 36 | +} |
| 37 | +trap killall_pids EXIT SIGINT SIGTERM |
| 38 | + |
| 39 | +for (( GPU_ID=0; GPU_ID<$NUM_GPUS; GPU_ID++ )) |
| 40 | +do |
| 41 | + START_INDEX=$(($INDEX + $GPU_ID * $DATA_CHUNK_SIZE)) |
| 42 | + if [[ $GPU_ID -lt $REMAINDER ]]; then |
| 43 | + CHUNK_SIZE=$(($DATA_CHUNK_SIZE + 1)) |
| 44 | + else |
| 45 | + CHUNK_SIZE=$DATA_CHUNK_SIZE |
| 46 | + fi |
| 47 | + END_INDEX=$(($START_INDEX + $CHUNK_SIZE - 1)) |
| 48 | + |
| 49 | + echo "Starting process for GPU $GPU_ID with data from $START_INDEX to $END_INDEX..." |
| 50 | + |
| 51 | + OUTDIR="$1/$GPU_ID" |
| 52 | + mkdir -p $OUTDIR |
| 53 | + |
| 54 | + CUDA_VISIBLE_DEVICES=$GPU_ID python -m star_align.self_ossinstruct \ |
| 55 | + --async_micro_batch_size $ASYNC_MICRO_BATCH_SIZE \ |
| 56 | + --use_vllm_server False \ |
| 57 | + --instruct_mode "$MODE" \ |
| 58 | + --seed_data_files $SEED_DATA_FILE \ |
| 59 | + --max_new_data $CHUNK_SIZE \ |
| 60 | + --tag sc2-${NUM_FEWSHOTS}shot \ |
| 61 | + --temperature 0.7 \ |
| 62 | + --seed_code_start_index $START_INDEX \ |
| 63 | + --model bigcode/starcoder2-15b \ |
| 64 | + --num_fewshots $NUM_FEWSHOTS \ |
| 65 | + --num_batched_requests $NUM_BATCHED_REQUESTS \ |
| 66 | + --num_sample_per_request $N_SAMPLES \ |
| 67 | + --save_dir $OUTDIR & |
| 68 | + PIDS+=($!) |
| 69 | +done |
| 70 | + |
| 71 | +wait |
0 commit comments