created code for trainin model to not hallucinate. Added multiple hal…

…lucinate scripts for all agents.
CodCodingCode · CodCodingCode · Jun 3, 2025 · Jun 7, 2025 · Jun 7, 2025 · Jun 7, 2025
commit c2564aa3b8b11940ef7dc5e1e7a2beeea9477f45
diff --git a/clinical_conversation_log.json b/clinical_conversation_log.json
@@ -1,5 +1,4 @@
 {
-  "timestamp": "2025-06-12T12:06:31.688894",
-  "vignette": "A 58-year-old woman presents to the clinic with a 3-day history of progressively worsening lower left abdominal pain, described as sharp and constant. She reports associated low-grade fever, nausea, and decreased appetite but denies any vomiting. Her bowel habits have changed recently, alternating between constipation and occasional loose stools. She mentions a history of a low-fiber diet and chronic constipation over several years. On examination, tenderness localized to the left lower quadrant is noted without rebound or guarding. Laboratory evaluation reveals mild leukocytosis, and a CT scan of the abdomen confirms the presence of inflamed diverticula in the sigmoid colon consistent with acute diverticulitis. She denies any rectal bleeding or significant family history of colon disease. The patient is advised to start on a high-fiber diet once acute inflammation resolves and is prescribed antibiotics to manage the infection.",
+  "timestamp": "2025-06-12T13:15:22.176083",
   "iterations": []
 }
diff --git a/new_data_gen/SFT /endpoint_test.py b/new_data_gen/SFT /endpoint_test.py
@@ -7,7 +7,7 @@
 # REPLACE THESE WITH YOUR ACTUAL VALUES
 # ============================================================================
 ENDPOINT_URL = "url"  # Your endpoint URL from the screenshot
-HF_TOKEN = "api"  # Your HuggingFace token
+HF_TOKEN = "token"  # Your HuggingFace token
 
 
 class HuggingFaceInference:

diff --git a/new_data_gen/SFT /patient.py b/new_data_gen/SFT /patient.py
@@ -18,9 +18,9 @@
 def format_fn(example):
     return {
         "text": (
-            f"### Instruction:\nYou are a patient agent. Please act as if you are a real patient with the following vignette and conversation.\n\n"
-            f"### Input:\n{example['input']}\n\n"
-            f"### Output:\n{example['output']}"
+            f"instruction:\nYou are a patient agent. Please act as if you are a real patient with the following vignette and conversation.\n\n"
+            f"input:\n{example['input']}\n\n"
+            f"output:\n{example['output']}"
         )
     }
 

diff --git a/new_data_gen/grpo_infra/Full_loop/endpoint.py b/new_data_gen/grpo_infra/Full_loop/endpoint.py
@@ -9,7 +9,10 @@
 # ============================================================================
 # REPLACE THESE WITH YOUR ACTUAL VALUES
 # ============================================================================
-  # Your JSON file with the vignettes
+DOCTOR_ENDPOINT_URL = "https://pawaab86ddo00b8m.us-east-1.aws.endpoints.huggingface.cloud"  # Your clinical/doctor model endpoint
+PATIENT_ENDPOINT_URL = "https://b81l3ho0k7z27sip.us-east-1.aws.endpoints.huggingface.cloud"  # Your patient agent model endpoint
+HF_TOKEN = "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"  # Your HuggingFace token
+VIGNETTES_FILE = "new_data_gen/actual_data_gen/disease_vignettes_from_familydoctor.json"  # Your JSON file with the vignettes
 
 
 class HuggingFaceInference:
@@ -64,9 +67,9 @@ def generate(self, prompt, max_new_tokens=800):
 def load_vignettes(filename):
     """Load vignettes from JSON file and select 2 per condition"""
     try:
-        with open(filename, 'r') as f:
+        with open(filename, "r") as f:
             data = json.load(f)
-        
+
         selected_vignettes = {}
         for condition, vignettes in data.items():
             cleaned_vignettes = []
@@ -78,8 +81,10 @@ def load_vignettes(filename):
                     cleaned_vignettes.append(str(vignette))
             # Take only first 2 vignettes per condition
             selected_vignettes[condition] = cleaned_vignettes[:2]
-            print(f"📋 Loaded {len(selected_vignettes[condition])} vignettes for {condition}")
-
+            print(
+                f"📋 Loaded {len(selected_vignettes[condition])} vignettes for {condition}"
+            )
+
         return selected_vignettes
     except FileNotFoundError:
         print(f"❌ Vignettes file {filename} not found!")
@@ -88,23 +93,23 @@ def load_vignettes(filename):
         print(f"❌ Invalid JSON in {filename}")
         return {}
 
+
 def get_random_vignette():
     """Get a random vignette from the loaded vignettes"""
     vignettes_data = load_vignettes(VIGNETTES_FILE)
     if not vignettes_data:
         return "A patient presents with concerning symptoms.", "Unknown Condition"
-    
+
     # Get all vignettes from all conditions with their condition names
     all_vignettes_with_conditions = []
     for condition, vignettes in vignettes_data.items():
         for vignette in vignettes:
             all_vignettes_with_conditions.append((vignette, condition))
-     
+
     selected_vignette, condition = random.choice(all_vignettes_with_conditions)
     return selected_vignette, condition
 
 
-
 # JSON logging functionality
 def save_to_json(data, filename="clinical_conversation_log.json"):
     """Save data to JSON file"""
@@ -187,7 +192,7 @@ def run_conversation():
     convo = []
     prev_questions = []
     convo.append("Doctor: What brings you in today?")
-    patient_input = f"### Instruction:\nYou are a patient agent. Please act as if you are a real patient with the following vignette and conversation.\n\n### Input:\n{vignette}\n\n{question}\n\n### Output:"  # MODIFIED THIS LINE
+    patient_input = f"Instruction:\nYou are a patient a
8000
gent. Please act as if you are a real patient with the following vignette and conversation.\n\n Input:\n{vignette}\n\n{question}\n\n Output: THINKING:"  # MODIFIED THIS LINE
     patient_response = patient_model.generate(
         patient_input, max_new_tokens=700
     )  # Generate initial patient response
@@ -282,6 +287,7 @@ def run_conversation():
             "thinking": thinking3,
             "answer": answer3,
         }
+        print(answer3)
 
         doctor_output = answer3 if answer3 else raw_output
         print("❓ Question Generation Output:")
@@ -313,7 +319,7 @@ def run_conversation():
         save_conversation_state(conversation_log)
         print(f"💾 Progress saved to clinical_conversation_log.json")
 
-        patient_input = f"### Instruction:\nYou are a patient agent. Please act as if you are a real patient with the following vignette and conversation.\n\n### Input:\n{vignette}\n\nCONVERSATION HISTORY:\n{chr(10).join(convo[-4:])}\n\nDOCTOR'S QUESTION: {doctor_output}\n\n### Output:"  # MODIFIED THIS LINE
+        patient_input = f"instruction:\nYou are a patient agent. Please act as if you are a real patient with the following vignette and conversation.\n\n input:\n{vignette} {doctor_output} output: THINKING"  # MODIFIED THIS LINE
         patient_response = patient_model.generate(patient_input, max_new_tokens=700)
         print("👤 Patient Response:", patient_response)
         # Clean patient response
@@ -322,6 +328,7 @@ def run_conversation():
                 -1
             ].strip()  # ADD THIS LINE
         convo.append(f"Patient: {patient_response}")
+        print("Patient Response:", patient_response)
         prev_vignette = output
         question = doctor_output  # Update question for next iteration
 

diff --git a/new_data_gen/grpo_infra/Full_loop/tempCodeRunnerFile.py b/new_data_gen/grpo_infra/Full_loop/tempCodeRunnerFile.py
@@ -1,2 +1 @@
-
-        print("👤 Patient Response:", patient_response)
+"
Original file line number	Diff line number	Diff line change
		@@ -1,2 +1 @@

		print("👤 Patient Response:", patient_response)
		"