Fix mapping of PostgreSQL encodings to Python encodings.

hlinnaka · hlinnaka · commit 138313ebaa98 · 2012-07-05T22:32:04.000+03:00
Windows encodings, "win1252" and so forth, are named differently in Python,
like "cp1252". Also, if the PyUnicode_AsEncodedString() function call fails
for some reason, use a plain ereport(), not a PLy_elog(), to report that
error. That avoids recursion and crash, if PLy_elog() tries to call
PLyUnicode_Bytes() again.

This fixes bug reported by Asif Naeem. Backpatch down to 9.0, before that
plpython didn't even try these conversions.

Jan Urbański, with minor comment improvements by me.
diff --git a/src/pl/plpython/plpython.c b/src/pl/plpython/plpython.c
@@ -4873,16 +4873,71 @@ PLyUnicode_Bytes(PyObject *unicode)
 	const char *serverenc;
 
 	/*
-	 * Python understands almost all PostgreSQL encoding names, but it doesn't
-	 * know SQL_ASCII.
+	 * Map PostgreSQL encoding to a Python encoding name.
 	 */
-	if (GetDatabaseEncoding() == PG_SQL_ASCII)
-		serverenc = "ascii";
-	else
-		serverenc = GetDatabaseEncodingName();
+	switch (GetDatabaseEncoding())
+	{
+		case PG_SQL_ASCII:
+			/*
+			 * Mapping SQL_ASCII to Python's 'ascii' is a bit bogus. Python's
+			 * 'ascii' means true 7-bit only ASCII, while PostgreSQL's
+			 * SQL_ASCII means that anything is allowed, and the system doesn't
+			 * try to interpret the bytes in any way. But not sure what else
+			 * to do, and we haven't heard any complaints...
+			 */
+			serverenc = "ascii";
+			break;
+		case PG_WIN1250:
+			serverenc = "cp1250";
+			break;
+		case PG_WIN1251:
+			serverenc = "cp1251";
+			break;
+		case PG_WIN1252:
+			serverenc = "cp1252";
+			break;
+		case PG_WIN1253:
+			serverenc = "cp1253";
+			break;
+		case PG_WIN1254:
+			serverenc = "cp1254";
+			break;
+		case PG_WIN1255:
+			serverenc = "cp1255";
+			break;
+		case PG_WIN1256:
+			serverenc = "cp1256";
+			break;
+		case PG_WIN1257:
+			serverenc = "cp1257";
+			break;
+		case PG_WIN1258:
+			serverenc = "cp1258";
+			break;
+		case PG_WIN866:
+			serverenc = "cp866";
+			break;
+		case PG_WIN874:
+			serverenc = "cp874";
+			break;
+		default:
+			/* Other encodings have the same name in Python. */
+			serverenc = GetDatabaseEncodingName();
+			break;
+	}
+
 	rv = PyUnicode_AsEncodedString(unicode, serverenc, "strict");
 	if (rv == NULL)
-		PLy_elog(ERROR, "could not convert Python Unicode object to PostgreSQL server encoding");
+	{
+		/*
+		 * Use a plain ereport instead of PLy_elog to avoid recursion, if
+		 * the traceback formatting functions try to do unicode to bytes
+		 * conversion again.
+		 */
+		ereport(ERROR,
+				(errcode(ERRCODE_INTERNAL_ERROR),
+				 errmsg("could not convert Python Unicode object to PostgreSQL server encoding")));
+	}
 	return rv;
 }