[go: up one dir, main page]

0% found this document useful (0 votes)
5 views5 pages

Version 2 of Employee_contract

The document is a Python script that automates the generation of employee agreements using data from an Excel file and a Word template. It includes functions for cleaning data, creating employee-specific directories, generating DOCX files, converting them to PDF, and handling user input for selecting which employee data to process. The script ensures that all required data is populated before generating the agreements and provides error handling for file operations.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views5 pages

Version 2 of Employee_contract

The document is a Python script that automates the generation of employee agreements using data from an Excel file and a Word template. It includes functions for cleaning data, creating employee-specific directories, generating DOCX files, converting them to PDF, and handling user input for selecting which employee data to process. The script ensures that all required data is populated before generating the agreements and provides error handling for file operations.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 5

import os

import pandas as pd
from docxtpl import DocxTemplate
import re
from datetime import datetime
import subprocess
import glob

# File paths - modify these to match your file names in /content


EXCEL_PATH = "/content/Employee_Data (Employee Agreement).xlsx"
TEMPLATE_PATH = "/content/Utsuk_Employee agreement.docx"
BASE_OUTPUT_DIR = "/content/drive/MyDrive/Employee_Contracts/Generated_Contracts"

# Constants remain the same


COLUMN_TO_PLACEHOLDER = {
'Date': 'date',
'Month': 'month',
'Year': 'year',
'Full Name': 'employee_name',
'House Address': 'house_address',
'PAN': 'PAN_card_number',
'Effective Date': 'effective_date',
'Effective Month': 'effective_month',
'Effective Year': 'effective_year',
'Designation': 'designation',
'Annual Pay (No.)': 'annual_pay_in_numbers',
'Annual Pay (words)': 'annual_pay_in_words',
'Basic (Monthly)': 'Basic_monthly',
'Basic (Annual)': 'Basic',
'HRA (Monthly)': 'HRA_monthly',
'HRA (Annual)': 'HRA',
'Bonus (Monthly)': 'Bonus_monthly',
'Bonus (Annual)': 'Bonus',
'Special Allowance (Monthly)': 'SA_monthly',
'Special Allowance (Annual)': 'SA',
'PF (Monthly)': 'PF_monthly',
'PF (Annual)': 'PF',
'CTC (Monthly)': 'CTC_monthly',
'CTC (Annual)': 'CTC',
'Personal Email': 'employee_personal_email_id'
}

def get_safe_employee_name(full_name):
"""Convert employee name to a safe folder name"""
return re.sub(r'[^\w\s-]', '', full_name).strip().replace(' ', '_')

def clean_employee_folder(folder_path):
"""Remove old DOCX and PDF files from the employee folder"""
for ext in ['*.docx', '*.pdf']:
for file in glob.glob(os.path.join(folder_path, ext)):
try:
os.remove(file)
print(f"Removed old file: {os.path.basename(file)}")
except Exception as e:
print(f"Error removing old file {file}: {str(e)}")

def setup_employee_folder(employee_name):
"""Create or clean employee-specific folder and return its path"""
safe_name = get_safe_employee_name(employee_name)
employee_dir = os.path.join(BASE_OUTPUT_DIR, safe_name)

if os.path.exists(employee_dir):
print(f"\nExisting folder found for {employee_name}")
print("Cleaning old files...")
clean_employee_folder(employee_dir)
else:
print(f"\nCreating new folder for {employee_name}")
os.makedirs(employee_dir)

return employee_dir

def docx_to_pdf(docx_path, pdf_path):


"""Convert DOCX to PDF using unoconv"""
try:
subprocess.run(['unoconv', '-f', 'pdf', '-o', pdf_path, docx_path],
check=True)
print(f"Converted {docx_path} to {pdf_path}")
except subprocess.CalledProcessError as e:
print(f"Error converting to PDF: {e}")
raise

def check_files_exist():
"""Check if required files exist"""
if not os.path.exists(EXCEL_PATH):
raise FileNotFoundError(f"Excel file not found at {EXCEL_PATH}")
if not os.path.exists(TEMPLATE_PATH):
raise FileNotFoundError(f"Template file not found at {TEMPLATE_PATH}")

def clean_currency(value):
"""Clean currency values and convert to string"""
if pd.isna(value):
return ""
if isinstance(value, (int, float)):
return f"{int(value):,}"
if isinstance(value, str):
numeric_value = float(re.sub(r'[^\d.]', '', value))
return f"{int(numeric_value):,}"
return str(value).strip()

def clean_numeric(value):
"""Clean numeric values to ensure they're integers"""
if pd.isna(value):
return ""
if isinstance(value, (int, float)):
return str(int(value))
return str(value).strip()

def clean_data(value, is_currency=False, is_numeric=False):


"""Clean and sanitize data"""
if is_currency:
return clean_currency(value)
if is_numeric:
return clean_numeric(value)
if pd.isna(value):
return ""
return str(value).strip()

def get_fully_populated_rows(df):
"""Return indices of rows that have all required columns populated"""
required_columns = list(COLUMN_TO_PLACEHOLDER.keys())
fully_populated = df.dropna(subset=required_columns)
return [idx + 2 for idx in fully_populated.index.tolist()]

def get_user_row_selection(populated_rows):
"""Get user input for row selection"""
while True:
print(f"\nAvailable Excel row numbers with complete data:
{populated_rows}")
print("Enter Excel row numbers to process. You can:")
print("1. Enter a single row number")
print("2. Enter a range (e.g., '2-5')")
print("3. Enter multiple rows separated by commas (e.g., '2,3,5')")

user_input = input("\nEnter your selection: ").strip()

try:
selected_indices = set()
for part in user_input.split(','):
part = part.strip()
if '-' in part:
start, end = map(int, part.split('-'))
if start not in populated_rows or end not in populated_rows:
raise ValueError(f"Row {start} or {end} is not available")
selected_indices.update([i for i in range(start, end + 1) if i
in populated_rows])
else:
row_num = int(part)
if row_num not in populated_rows:
raise ValueError(f"Row {row_num} is not available")
selected_indices.add(row_num)

return sorted([row_num - 2 for row_num in selected_indices])

except ValueError as e:
print(f"Invalid input: {e}. Please try again.")

def create_context(employee_data):
"""Create template context from employee data"""
context = {}
currency_columns = [
'Annual Pay (No.)', 'Basic (Monthly)', 'Basic (Annual)',
'HRA (Monthly)', 'HRA (Annual)', 'Bonus (Monthly)', 'Bonus (Annual)',
'Special Allowance (Monthly)', 'Special Allowance (Annual)',
'PF (Monthly)', 'PF (Annual)', 'CTC (Monthly)', 'CTC (Annual)'
]
numeric_columns = ['Date', 'Effective Date', 'Year', 'Effective Year']

for excel_col, template_key in COLUMN_TO_PLACEHOLDER.items():


if excel_col in employee_data:
is_currency = excel_col in currency_columns
is_numeric = excel_col in numeric_columns
context[template_key] = clean_data(employee_data[excel_col],
is_currency, is_numeric)
return context

def generate_filename(timestamp):
"""Generate a filename with timestamp"""
return f"Employment_Agreement_{timestamp}"

def process_employees(df, selected_indices):


"""Process employee data and generate documents"""
try:
generated_files = []
for index in selected_indices:
try:
employee_data = df.iloc[index].to_dict()
context = create_context(employee_data)

# Get employee name and setup/clean folder


employee_name = employee_data['Full Name']
employee_dir = setup_employee_folder(employee_name)

# Generate filenames (without timestamp for consistency)


base_filename = "Employment_Agreement"
docx_path = os.path.join(employee_dir, f"{base_filename}.docx")
pdf_path = os.path.join(employee_dir, f"{base_filename}.pdf")

# Generate and save DOCX


doc = DocxTemplate(TEMPLATE_PATH)
doc.render(context)
doc.save(docx_path)
print(f"Created new DOCX for {employee_name}")

# Convert to PDF
try:
docx_to_pdf(docx_path, pdf_path)
generated_files.append((employee_name, docx_path, pdf_path))
print(f"Created new PDF for {employee_name}")
except Exception as pdf_error:
print(f"Error converting to PDF for {employee_name}:
{str(pdf_error)}")
generated_files.append((employee_name, docx_path, None))
print(f"Only DOCX was saved for {employee_name}")

except Exception as e:
print(f"Error processing Excel row {index + 2}: {str(e)}")

return generated_files
except Exception as e:
print(f"Error in process_employees: {str(e)}")
return []

def main():
try:
# Create base output directory
os.makedirs(BASE_OUTPUT_DIR, exist_ok=True)

# Check if files exist


check_files_exist()

# Load Excel data


print("Loading Excel data...")
df = pd.read_excel(EXCEL_PATH)

# Get fully populated rows


populated_rows = get_fully_populated_rows(df)
if not populated_rows:
print("No fully populated rows found in the Excel file.")
return

# Get user selection for rows


selected_indices = get_user_row_selection(populated_rows)

# Process selected employees


print(f"\nProcessing selected rows...")
generated_files = process_employees(df, selected_indices)

if generated_files:
print("\nGenerated files summary:")
for employee_name, docx_path, pdf_path in generated_files:
print(f"\nEmployee: {employee_name}")
print(f"Location: {os.path.dirname(docx_path)}")
print(f"Files generated:")
print(f"- {os.path.basename(docx_path)}")
if pdf_path:
print(f"- {os.path.basename(pdf_path)}")
else:
print("- PDF conversion failed")

print("\nProcess completed!")

except Exception as e:
print(f"An error occurred: {str(e)}")

if __name__ == "__main__":
main()

You might also like