From faeb8e11361740964e276b48a9b41ce1e0f0c21b Mon Sep 17 00:00:00 2001 From: Dave Hylands Date: Sun, 24 Jan 2016 01:28:55 -0800 Subject: [PATCH 1/3] py: Add C API for reading from file or file-like objects --- py/mpconfig.h | 5 +++ py/mpfile.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++ py/mpfile.h | 55 +++++++++++++++++++++++ py/py.mk | 1 + py/qstrdefs.h | 2 + 5 files changed, 182 insertions(+) create mode 100644 py/mpfile.c create mode 100644 py/mpfile.h diff --git a/py/mpconfig.h b/py/mpconfig.h index 402e97acc2069..ce049291b0d50 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -696,6 +696,11 @@ typedef double mp_float_t; #define MICROPY_PY_GC_COLLECT_RETVAL (0) #endif +// Whether to provide "file" or "file-like" API support +#ifndef MICROPY_PY_FILE_LIKE +#define MICROPY_PY_FILE_LIKE (0) +#endif + // Whether to provide "io" module #ifndef MICROPY_PY_IO #define MICROPY_PY_IO (1) diff --git a/py/mpfile.c b/py/mpfile.c new file mode 100644 index 0000000000000..776967ecc3b0d --- /dev/null +++ b/py/mpfile.c @@ -0,0 +1,119 @@ +/* + * This file is part of the Micro Python project, http://micropython.org/ + * + * The MIT License (MIT) + * + * Copyright (c) 2016 Dave Hylands + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "py/mpfile.h" + +#include "py/builtin.h" +#include "py/misc.h" +#include "py/runtime.h" + +#include + +#if MICROPY_PY_FILE_LIKE + +STATIC const mp_obj_type_t mp_file_type; +STATIC mp_obj_t mp___del__(mp_obj_t self); + +mp_file_t *mp_file_from_file_obj(mp_obj_t file_obj) { + mp_file_t *file = m_new_obj_with_finaliser(mp_file_t); + memset(file, 0, sizeof(*file)); + file->base.type = &mp_file_type; + file->file_obj = file_obj; + file->readinto_fn = mp_const_none; + file->seek_fn = mp_const_none; + file->tell_fn = mp_const_none; + + return file; +} + +mp_file_t *mp_open(const char *filename, const char *mode) { + mp_obj_t filename_obj = mp_obj_new_str(filename, strlen(filename), false); + mp_obj_t mode_obj = mp_obj_new_str(mode, strlen(mode), true); + mp_obj_t args[2] = { filename_obj, mode_obj }; + return mp_file_from_file_obj(mp_builtin_open(2, args, (mp_map_t *)&mp_const_empty_map)); +} + +mp_int_t mp_readinto(mp_file_t *file, void *buf, size_t num_bytes) { + if (file->readinto_fn == mp_const_none) { + file->readinto_fn = mp_load_attr(file->file_obj, MP_QSTR_readinto); + } + mp_obj_t bytearray = mp_obj_new_bytearray_by_ref(num_bytes, buf); + mp_obj_t bytes_read = mp_call_function_1(file->readinto_fn, bytearray); + if (bytes_read == mp_const_none) { + return 0; + } + return mp_obj_get_int(bytes_read); +} + +off_t mp_seek(mp_file_t *file, off_t offset, int whence) { + if (file->seek_fn == mp_const_none) { + file->seek_fn = mp_load_attr(file->file_obj, MP_QSTR_seek); + } + return mp_obj_get_int(mp_call_function_2(file->seek_fn, + MP_OBJ_NEW_SMALL_INT(offset), + MP_OBJ_NEW_SMALL_INT(whence))); +} + +off_t mp_tell(mp_file_t *file) { + if (file->tell_fn == mp_const_none) { + file->tell_fn = mp_load_attr(file->file_obj, MP_QSTR_tell); + } + return mp_obj_get_int(mp_call_function_0(file->tell_fn)); +} + +void mp_close(mp_file_t *file) { + mp_obj_t close_fn = mp_load_attr(file->file_obj, MP_QSTR_close); + file->file_obj = mp_const_none; + file->readinto_fn = mp_const_none; + file->seek_fn = mp_const_none; + file->tell_fn = mp_const_none; + mp_call_function_0(close_fn); +} + +STATIC void mp_file_print(const mp_print_t *print, mp_obj_t self, mp_print_kind_t kind) { + (void)kind; + mp_printf(print, "", self); +} + +STATIC mp_obj_t mp___del__(mp_obj_t self) { + mp_close(MP_OBJ_TO_PTR(self)); + return mp_const_none; +} +STATIC MP_DEFINE_CONST_FUN_OBJ_1(mp___del___obj, mp___del__); + +STATIC const mp_rom_map_elem_t mp_file_locals_dict_table[] = { + { MP_ROM_QSTR(MP_QSTR___del__), MP_ROM_PTR(&mp___del___obj) }, +}; +STATIC MP_DEFINE_CONST_DICT(mp_file_locals_dict, mp_file_locals_dict_table); + +STATIC const mp_obj_type_t mp_file_type = { + .base = { &mp_type_type }, + .name = MP_QSTR_mp_file, + .print = mp_file_print, + .locals_dict = (mp_obj_dict_t *)&mp_file_locals_dict, +}; + +#endif // MICROPY_PY_FILE_LIKE diff --git a/py/mpfile.h b/py/mpfile.h new file mode 100644 index 0000000000000..02453647adc0b --- /dev/null +++ b/py/mpfile.h @@ -0,0 +1,55 @@ +/* + * This file is part of the Micro Python project, http://micropython.org/ + * + * The MIT License (MIT) + * + * Copyright (c) 2016 Dave Hylands + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __MICROPY_INCLUDED_PY_MPFILE_H__ +#define __MICROPY_INCLUDED_PY_MPFILE_H__ + +#include "py/obj.h" +#include // for off_t + +// A C API for performing I/O on files or file-like objects. + +typedef struct { + mp_obj_base_t base; + mp_obj_t file_obj; + mp_obj_t readinto_fn; + mp_obj_t seek_fn; + mp_obj_t tell_fn; +} mp_file_t; + +#define MP_SEEK_SET 0 +#define MP_SEEK_CUR 1 +#define MP_SEEK_END 2 + +mp_file_t *mp_file_from_file_obj(mp_obj_t file_obj); +mp_file_t *mp_open(const char *filename, const char *mode); +mp_int_t mp_readinto(mp_file_t *file, void *buf, size_t num_bytes); +off_t mp_seek(mp_file_t *file, off_t offset, int whence); +off_t mp_tell(mp_file_t *file); +void mp_close(mp_file_t *file); + + +#endif // __MICROPY_INCLUDED_PY_MPFILE_H__ diff --git a/py/py.mk b/py/py.mk index 63770e2d24530..fcde3f7ef5f55 100644 --- a/py/py.mk +++ b/py/py.mk @@ -99,6 +99,7 @@ PY_O_BASENAME = \ argcheck.o \ warning.o \ map.o \ + mpfile.o \ obj.o \ objarray.o \ objattrtuple.o \ diff --git a/py/qstrdefs.h b/py/qstrdefs.h index bf774e036b6bd..5a5bc6fb33dd6 100644 --- a/py/qstrdefs.h +++ b/py/qstrdefs.h @@ -266,6 +266,8 @@ Q(value) Q(write) Q(zip) +Q(mp_file) + #if MICROPY_PY_BUILTINS_COMPILE Q(compile) Q(code) From 88cf919b7d73ffa3b87191338843511560211af3 Mon Sep 17 00:00:00 2001 From: Dave Hylands Date: Thu, 21 Jan 2016 17:46:26 -0800 Subject: [PATCH 2/3] extmod: Add uzipfile This supports decompressing stored files, and if MICROPY_PY_ZLIB is enabled then DEFLATED files (the default compression that zip uses) can be decompressed. --- extmod/moduzipfile.c | 667 +++++++++++++++++++++++++++++++++++++++ extmod/moduzipfile.h | 40 +++ py/builtin.h | 1 + py/mpconfig.h | 4 + py/objmodule.c | 3 + py/py.mk | 1 + py/qstrdefs.h | 11 + tests/extmod/zipfile1.py | 121 +++++++ unix/mpconfigport.h | 2 + 9 files changed, 850 insertions(+) create mode 100644 extmod/moduzipfile.c create mode 100644 extmod/moduzipfile.h create mode 100644 tests/extmod/zipfile1.py diff --git a/extmod/moduzipfile.c b/extmod/moduzipfile.c new file mode 100644 index 0000000000000..0c8634615eef1 --- /dev/null +++ b/extmod/moduzipfile.c @@ -0,0 +1,667 @@ +/* + * This file is part of the Micro Python project, http://micropython.org/ + * + * The MIT License (MIT) + * + * Copyright (c) 2016 Dave Hylands + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "extmod/moduzipfile.h" + +#include +#include +#include + +#include "py/builtin.h" +#include "py/mpfile.h" +#include "py/nlr.h" +#include "py/obj.h" +#include "py/runtime.h" +#include "py/stream.h" + +#if MICROPY_PY_UZLIB +#include "uzlib/tinf.h" +#endif + +#if MICROPY_PY_UZIPFILE || MICROPY_PY_ZIPIMPORT + +#if !MICROPY_PY_FILE_LIKE +#error MICROPY PY_UZIPFILE or MICRPPY PY_ZIPIMPORT needs MICROPY PY_FILE_LIKE to be set +#endif + +#pragma pack(push, 1) + +#if MP_ENDIANNESS_LITTLE +// Zip files are specified as little endian +#define ZF_UINT16(x) (x) +#define ZF_UINT32(x) (x) +#else +#define ZF_UINT16(x) ((((x) & 0x00ff) << 8) | (((x) & 0xff00) >> 8)) +#define ZF_UINT32(x) ((((x) & 0x000000ff) << 24) | (((x) & 0x0000ff00) << 8) | (((x) & 0x00ff0000) >> 8) | (((x) & 0xff000000) >> 24)) +#endif + +#define ZIP_FILE_HEADER_SIGNATURE "PK\x03\x04" +typedef struct +{ + uint32_t signature; + uint16_t version; + uint16_t flags; + uint16_t compression_method; + uint16_t last_mod_time; + uint16_t last_mod_date; + uint32_t crc32; + uint32_t compressed_size; + uint32_t uncompressed_size; + uint16_t filename_len; + uint16_t extra_len; + + /* char filename[filename_len] */ + /* uint8_t extra[extra_len] */ + +} ZIP_FILE_HDR; + +#define ZIP_CENTRAL_DIRECTORY_SIGNATURE "PK\x01\x02" +typedef struct +{ + uint32_t signature; + uint16_t version_made_by; + uint16_t version_read_with; + uint16_t flags; + uint16_t compression_method; + uint16_t last_mod_time; + uint16_t last_mod_date; + uint32_t crc32; + uint32_t compressed_size; + uint32_t uncompressed_size; + uint16_t filename_len; + uint16_t extra_len; + uint16_t file_comment_len; + uint16_t disk_num; + uint16_t internal_file_attributes; + uint32_t external_file_attributes; + uint32_t file_header_offset; + + /* char filename[filename_len] */ + /* uint8_t extra[extra_len] */ + +} ZIP_CENTRAL_DIRECTORY_HDR; + +#define ZIP_END_OF_CENTRAL_DIRECTORY_SIGNATURE "PK\x05\x06" +typedef struct +{ + uint32_t signature; + uint16_t disk_num; + uint16_t central_directory_disk; + uint16_t num_central_directories_this_disk; + uint16_t total_central_directories; + uint32_t central_directory_size; + uint32_t central_directory_offset; + uint16_t comment_len; + + /* char comment[comment_len] */ + +} ZIP_END_OF_CENTRAL_DIRECTORY; + +#pragma pack(pop) + +typedef struct { + uint32_t file_size; + uint16_t last_mod_date; + uint16_t last_mod_time; + uint8_t is_dir; + +} ZIP_FILE_INFO; + +#define ZIP_STORED 0 +#define ZIP_DEFLATED 8 + +typedef struct { + mp_obj_base_t base; + const char *filename; + mp_file_t *file; + off_t end_directory_posn; + off_t directory_posn; + off_t first_file_posn; +} ZipFile_t; + +typedef struct { + mp_obj_base_t base; + ZipFile_t *zf; + mp_obj_t filename; + off_t seek_offset; + off_t data_posn; + uint32_t compressed_size; + uint32_t uncompressed_size; + uint8_t *uncompressed_data; +} ZipExtFile_t; + +STATIC const mp_obj_type_t ZipFile_type; +STATIC const mp_obj_type_t ZipExtFile_type; + +STATIC off_t zf_find_end_of_central_directory(ZipFile_t *zf); +STATIC ZipFile_t *zipfile_open_file_or_filename(const char *filename, mp_obj_t file_or_filename); + +STATIC uint zipfile_find_archive(ZipFile_t *zf, const char *archive_name, ZIP_FILE_HDR *file_hdr, off_t *out_posn); +STATIC mp_obj_t zipfile_open_archive(ZipFile_t *zf, const char *archive_name, ZIP_FILE_HDR *file_hdr, off_t posn); + +STATIC void zipfile_close(ZipFile_t *zf); + +#if MICROPY_PY_ZIPIMPORT +// Special helper functions used for implementing zipfile import. If zip_path +// contains .zip then everything up until the .zip is considered to be the name +// of the zipfile, and everything after is considered to the archive name. + +// Takes a path which combines the .zip filename and archive name, and returns +// a zipfile handle if the zipfile was opened successfully. +STATIC ZipFile_t *zipfile_import_open(const char *zip_archive_path, const char **archive_name) { + *archive_name = NULL; + char *s = strstr(zip_archive_path, ".zip/"); + if (s == NULL) { + // No .zip in path, so we assume that there is no zipfile. + return NULL; + } + size_t len = s - zip_archive_path + 4; // +4 for .zip + char *zip_path = m_new(char, len + 1); + memcpy(zip_path, zip_archive_path, len); + zip_path[len] = '\0'; + + ZipFile_t *zf = zipfile_open_file_or_filename(zip_path, mp_const_none); + m_del(char, zip_path, len + 1); + zip_path = NULL; + + if (zf->end_directory_posn < 0) { + // No end central directory - this isn't a zipfile. + zipfile_close(zf); + zf = NULL; + return NULL; + } + *archive_name = &s[5]; + return zf; +} + +// Takes a path which combines the .zip filename and archive name, and returns +// an indicator of whether the archive exists, and whether its a directory or +// a file. +STATIC mp_import_stat_t zipfile_import_core(const char *zip_archive_path, mp_obj_t *zef) { + if (zef) { + *zef = mp_const_none; + } + mp_import_stat_t imp_stat = MP_IMPORT_STAT_NO_EXIST; + nlr_buf_t nlr; + if (nlr_push(&nlr) == 0) { + const char *archive_name; + ZipFile_t *zf = zipfile_import_open(zip_archive_path, &archive_name); + if (zf) { + ZIP_FILE_HDR file_hdr; + off_t posn; + imp_stat = zipfile_find_archive(zf, archive_name, &file_hdr, &posn); + if (zef && imp_stat == MP_IMPORT_STAT_FILE) { + *zef = zipfile_open_archive(zf, archive_name, &file_hdr, posn); + } else { + zipfile_close(zf); + } + } + zf = NULL; + archive_name = NULL; + nlr_pop(); + } + return imp_stat; +} + +// Takes a path which combines the .zip filename and archive name, and returns +// a file object if the archive was opened successfully. +mp_import_stat_t zipfile_import_stat(const char *zip_archive_path) { + return zipfile_import_core(zip_archive_path, NULL); +} + +// Takes a path which combines the .zip filename and archive name, and returns +// a file object if the archive was opened successfully. +mp_obj_t zipfile_import_open_archive(const char *zip_archive_path) { + mp_obj_t zef; + zipfile_import_core(zip_archive_path, &zef); + return zef; +} + +#endif // MICROPY_PY_ZIPIMPORT + +STATIC void zipfile_close(ZipFile_t *zf) { + mp_close(zf->file); + zf->file = NULL; + zf->filename = NULL; + zf->end_directory_posn = -1; + zf->directory_posn = -1; + zf->first_file_posn = -1; + m_del(ZipFile_t, zf, 1); +} + +STATIC ZipFile_t *zipfile_open_file_or_filename(const char *filename, mp_obj_t file_or_filename) { + ZipFile_t *zf = m_new0(ZipFile_t, 1); + zf->base.type = &ZipFile_type; + + if (filename) { + zf->filename = filename; + zf->file = mp_open(zf->filename, "rb"); + } else if (MP_OBJ_IS_STR(file_or_filename)) { + zf->filename = mp_obj_str_get_str(file_or_filename); + zf->file = mp_open(zf->filename, "rb"); + } else { + zf->filename = "file obj"; + zf->file = mp_file_from_file_obj(file_or_filename); + } + + zf_find_end_of_central_directory(zf); + zf->directory_posn = -1; + zf->first_file_posn = -1; + return zf; +} + +STATIC off_t zf_find_end_of_central_directory(ZipFile_t *zf) { + // The end-of-central-directory is located at the end of the zip file, + // but may be followed by a variable length comment (max len 64K). + + zf->end_directory_posn = -1; + + off_t filesize = mp_seek(zf->file, 0, MP_SEEK_END); + if (filesize < (off_t)sizeof(ZIP_END_OF_CENTRAL_DIRECTORY)) { + return -1; + } + off_t posn = mp_seek(zf->file, -sizeof(ZIP_END_OF_CENTRAL_DIRECTORY), MP_SEEK_END); + byte signature[4]; + if (mp_readinto(zf->file, &signature, sizeof(signature)) == sizeof(signature) + && memcmp(signature, ZIP_END_OF_CENTRAL_DIRECTORY_SIGNATURE, sizeof(signature)) == 0) { + zf->end_directory_posn = posn; + return zf->end_directory_posn; + } + + // TODO: In the general case, the End of Central Directory may be followed + // by a variable length comment. So we should search backwards in the file + // for upto 64K to try and find the End of Central Directory. + // + // For now, we don't support zipfiles with a comment. + + return -1; +} + +STATIC off_t zf_find_central_directory(ZipFile_t *zf) { + zf->directory_posn = -1; + if (zf->end_directory_posn < 0) { + return -1; + } + + mp_seek(zf->file, zf->end_directory_posn, MP_SEEK_SET); + ZIP_END_OF_CENTRAL_DIRECTORY endDir; + mp_readinto(zf->file, &endDir, sizeof(endDir)); + + uint32_t cd_size = ZF_UINT32(endDir.central_directory_size); + if (cd_size == 0) { + // Empty ZIP file - no central directory + return -1; + } + zf->directory_posn = ZF_UINT32(endDir.central_directory_offset); + return zf->directory_posn; +} + +STATIC off_t zf_find_first_file(ZipFile_t *zf) { + if (zf_find_central_directory(zf) < 0) { + return -1; + } + + mp_seek(zf->file, zf->directory_posn, MP_SEEK_SET); + ZIP_CENTRAL_DIRECTORY_HDR centralDir; + mp_readinto(zf->file, ¢ralDir, sizeof(centralDir)); + + zf->first_file_posn = ZF_UINT32(centralDir.file_header_offset); + return zf->first_file_posn; +} + +STATIC off_t zf_get_file_hdr(ZipFile_t *zf, off_t posn, ZIP_FILE_HDR *file_hdr) { + mp_seek(zf->file, posn, MP_SEEK_SET); + if (mp_readinto(zf->file, file_hdr, sizeof(*file_hdr)) != sizeof(*file_hdr) + || memcmp(&file_hdr->signature, ZIP_FILE_HEADER_SIGNATURE, sizeof(file_hdr->signature)) != 0) { + return -1; + } + + // Calculate the position of the next file header and return it + return posn + sizeof(*file_hdr) + + ZF_UINT16(file_hdr->filename_len) + + ZF_UINT16(file_hdr->extra_len) + + ZF_UINT32(file_hdr->compressed_size); +} + +STATIC uint zipfile_find_archive(ZipFile_t *zf, const char *archive_name, ZIP_FILE_HDR *file_hdr, off_t *out_posn) { + mp_import_stat_t ret = MP_IMPORT_STAT_NO_EXIST; + + off_t posn = zf_find_central_directory(zf); + if (posn >= 0) { + posn = zf_find_first_file(zf); + } + // Directories stored in the archive end in / + size_t fn_len = strlen(archive_name); + char *fn_buf = m_new(char, fn_len + 2); // +1 for slash, +1 for null termination + while (posn >= 0) { + off_t next_posn = zf_get_file_hdr(zf, posn, file_hdr); + if (next_posn >= 0) { + uint16_t len = ZF_UINT16(file_hdr->filename_len); + if (len <= fn_len) { + mp_readinto(zf->file, fn_buf, len); + fn_buf[len] = '\0'; + } else { + mp_readinto(zf->file, fn_buf, fn_len + 1); + fn_buf[fn_len + 1] = '\0'; + } + + // See if archive_name matches the directory portion + if (strncmp(fn_buf, archive_name, fn_len) == 0 && fn_buf[fn_len] == '/') { + ret = MP_IMPORT_STAT_DIR; + break; + } + if (strcmp(fn_buf, archive_name) == 0) { + ret = MP_IMPORT_STAT_FILE; + break; + } + } + posn = next_posn; + } + m_del(char, fn_buf, fn_len + 1); + *out_posn = posn; + return ret; +} + +STATIC mp_obj_t zipfile_open_archive(ZipFile_t *zf, const char *archive_name, ZIP_FILE_HDR *file_hdr, off_t posn) { + ZipExtFile_t *zef = m_new0(ZipExtFile_t, 1); + zef->base.type = &ZipExtFile_type; + zef->zf = zf; + zef->filename = mp_obj_new_str(archive_name, strlen(archive_name), false); + zef->seek_offset = 0; + zef->data_posn = posn + sizeof(*file_hdr) + + ZF_UINT16(file_hdr->filename_len) + + ZF_UINT16(file_hdr->extra_len); + zef->compressed_size = ZF_UINT32(file_hdr->compressed_size); + zef->uncompressed_size = ZF_UINT32(file_hdr->uncompressed_size); + + uint16_t compression_method = ZF_UINT16(file_hdr->compression_method); + if (compression_method == ZIP_STORED) { + // We'll do file I/O as read is called. +#if MICROPY_PY_UZLIB + } else if (compression_method == ZIP_DEFLATED) { + // Since tinf only supports decompressing the whole thing, we read + // the entire uncompressed data into memory, and decompress it. + zef->uncompressed_data = m_new(uint8_t, zef->uncompressed_size); + void *compressed = m_new(uint8_t, zef->compressed_size); + mp_seek(zef->zf->file, zef->data_posn, MP_SEEK_SET); + mp_int_t bytes_read = mp_readinto(zf->file, compressed, zef->compressed_size); + if (bytes_read != zef->compressed_size) { + nlr_raise(mp_obj_new_exception_arg1(&mp_type_OSError, MP_OBJ_NEW_SMALL_INT(EIO))); + } + unsigned int destlen = zef->uncompressed_size; + int res = tinf_uncompress(zef->uncompressed_data, &destlen, compressed, zef->compressed_size); + if (res != TINF_OK) { + mp_int_t errcode = EILSEQ; // Bad Data - we'll call it illegal byte sequence + if (res == TINF_DEST_OVERFLOW) { + errcode = EFBIG; // The destination buffer should have been big enough. + } + nlr_raise(mp_obj_new_exception_arg1(&mp_type_OSError, MP_OBJ_NEW_SMALL_INT(errcode))); + } + // It should always come out to be the right size, but just in case + // we'll only index into the data that was actually uncompressed. + zef->uncompressed_size = destlen; + m_del(uint8_t, compressed, zef->compressed_size); +#endif + } else { + m_del(ZipExtFile_t, zef, 1); + zef = NULL; + nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "Unsupported compression: %d", compression_method)); + } + + return MP_OBJ_FROM_PTR(zef); +} + +// ZipExtFile is used as a file-like object for importing from zip files. + +STATIC void ZipExtFile_print(const mp_print_t *print, mp_obj_t self, mp_print_kind_t kind) { + (void)kind; + ZipExtFile_t *zef = MP_OBJ_TO_PTR(self); + mp_printf(print, "", mp_obj_str_get_str(zef->filename)); +} + +STATIC mp_obj_t ZipExtFile_obj_close(mp_obj_t self) { + ZipExtFile_t *zef = MP_OBJ_TO_PTR(self); + if (zef->uncompressed_data) { + zef->filename = mp_const_none; + zef->seek_offset = 0; + zef->data_posn = 0; + zef->compressed_size = 0; + m_del(uint8_t, zef->uncompressed_data, zef->uncompressed_size); + zef->uncompressed_size = 0; + zef->uncompressed_data = NULL; + } + return mp_const_none; +} +STATIC MP_DEFINE_CONST_FUN_OBJ_1(ZipExtFile_obj_close_obj, ZipExtFile_obj_close); + +STATIC mp_obj_t ZipExtFile_obj___exit__(size_t n_args, const mp_obj_t *args) { + (void)n_args; + return ZipExtFile_obj_close(args[0]); +} +STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(ZipExtFile_obj___exit___obj, 4, 4, ZipExtFile_obj___exit__); + +STATIC const mp_rom_map_elem_t ZipExtFile_locals_dict_table[] = { + { MP_ROM_QSTR(MP_QSTR_read), MP_ROM_PTR(&mp_stream_read_obj) }, + { MP_ROM_QSTR(MP_QSTR_readall), MP_ROM_PTR(&mp_stream_readall_obj) }, + { MP_ROM_QSTR(MP_QSTR_readinto), MP_ROM_PTR(&mp_stream_readinto_obj) }, + { MP_ROM_QSTR(MP_QSTR_readline), MP_ROM_PTR(&mp_stream_unbuffered_readline_obj) }, + { MP_ROM_QSTR(MP_QSTR_readlines), MP_ROM_PTR(&mp_stream_unbuffered_readlines_obj) }, + { MP_ROM_QSTR(MP_QSTR_close), MP_ROM_PTR(&ZipExtFile_obj_close_obj) }, + { MP_ROM_QSTR(MP_QSTR___del__), MP_ROM_PTR(&ZipExtFile_obj_close_obj) }, + { MP_ROM_QSTR(MP_QSTR___enter__), MP_ROM_PTR(&mp_identity_obj) }, + { MP_ROM_QSTR(MP_QSTR___exit__), MP_ROM_PTR(&ZipExtFile_obj___exit___obj) }, +}; + +STATIC MP_DEFINE_CONST_DICT(ZipExtFile_locals_dict, ZipExtFile_locals_dict_table); + +STATIC mp_uint_t ZipExtFile_read(mp_obj_t self, void *buf, mp_uint_t size, int *errcode) { + ZipExtFile_t *zef = MP_OBJ_TO_PTR(self); + if (zef->seek_offset + size > zef->uncompressed_size) { + size = zef->uncompressed_size - zef->seek_offset; + } + if (size == 0) { + return 0; + } + mp_int_t bytes_read; + if (zef->uncompressed_data) { + memcpy(buf, &zef->uncompressed_data[zef->seek_offset], size); + bytes_read = size; + } else { + mp_seek(zef->zf->file, zef->data_posn + zef->seek_offset, MP_SEEK_SET); + bytes_read = mp_readinto(zef->zf->file, buf, size); + if (bytes_read < 0) { + *errcode = EIO; + return MP_STREAM_ERROR; + } + } + zef->seek_offset += bytes_read; + return bytes_read; +} + +STATIC const mp_stream_p_t ZipExtFile_stream_p = { + .read = ZipExtFile_read, +}; + +STATIC const mp_obj_type_t ZipExtFile_type = { + { &mp_type_type }, + .name = MP_QSTR_ZipExtFile, + .print = ZipExtFile_print, + .getiter = mp_identity, + .iternext = mp_stream_unbuffered_iter, + .stream_p = &ZipExtFile_stream_p, + .locals_dict = (mp_obj_dict_t *)&ZipExtFile_locals_dict, +}; + +#endif // MICROPY_PY_UZIPFILE || MICROPY_PY_ZIPIMPORT + +// ============================================================================= +// +// Everything from this point onwards in the file is only needed to implement +// the ZipFile classes. +// +// ============================================================================= + +#if MICROPY_PY_UZIPFILE + +STATIC void ZipFile_print(const mp_print_t *print, mp_obj_t self, mp_print_kind_t kind) { + (void)kind; + ZipFile_t *zf = MP_OBJ_TO_PTR(self); + mp_printf(print, "", zf->filename); +} + +STATIC mp_obj_t ZipFile_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_kw, const mp_obj_t *args) { + (void)type; + static const mp_arg_t allowed_args[] = { + { MP_QSTR_file, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_PTR(&mp_const_none_obj)}}, + { MP_QSTR_mode, MP_ARG_OBJ, {.u_rom_obj = MP_ROM_QSTR(MP_QSTR_r)}}, + }; + enum { ARG_file, ARG_mode }; + mp_arg_val_t vals[MP_ARRAY_SIZE(allowed_args)]; + + mp_map_t kw_args; + mp_map_init_fixed_table(&kw_args, n_kw, args + n_args); + + mp_arg_parse_all(n_args, args, &kw_args, + MP_ARRAY_SIZE(allowed_args), allowed_args, vals); + + if (strcmp(mp_obj_str_get_str(vals[ARG_mode].u_obj), "r") != 0) { + nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "Only 'r' mode is supported")); + } + + ZipFile_t *zf = zipfile_open_file_or_filename(NULL, vals[ARG_file].u_obj); + if (zf->end_directory_posn < 0) { + nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError, + "%s is not a zip file", + zf->filename)); + } + return MP_OBJ_FROM_PTR(zf); +} + +STATIC mp_obj_t ZipFile_namelist(mp_obj_t self) { + ZipFile_t *zf = MP_OBJ_TO_PTR(self); + + mp_obj_t namelist = mp_obj_new_list(0, NULL); + off_t posn = zf_find_central_directory(zf); + if (posn >= 0) { + posn = zf_find_first_file(zf); + } + while (posn >= 0) { + ZIP_FILE_HDR file_hdr; + off_t next_posn = zf_get_file_hdr(zf, posn, &file_hdr); + if (next_posn < 0) { + break; + } + + uint16_t len = ZF_UINT16(file_hdr.filename_len); + vstr_t filename; + vstr_init_len(&filename, len); + mp_readinto(zf->file, vstr_str(&filename), len); + mp_obj_list_append(namelist, mp_obj_new_str_from_vstr(&mp_type_str, &filename)); + + posn = next_posn; + } + return namelist; +} +STATIC MP_DEFINE_CONST_FUN_OBJ_1(ZipFile_namelist_obj, ZipFile_namelist); + +STATIC mp_obj_t ZipFile_open(mp_obj_t self, mp_obj_t archive_name_in) { + ZipFile_t *zf = MP_OBJ_TO_PTR(self); + const char *archive_name = mp_obj_str_get_str(archive_name_in); + + ZIP_FILE_HDR file_hdr; + off_t posn; + uint stat = zipfile_find_archive(zf, archive_name, &file_hdr, &posn); + if (stat != MP_IMPORT_STAT_FILE) { + nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_KeyError, "'%s' not in archive", archive_name)); + } + return zipfile_open_archive(zf, archive_name, &file_hdr, posn); +} +STATIC MP_DEFINE_CONST_FUN_OBJ_2(ZipFile_open_obj, ZipFile_open); + +STATIC mp_obj_t ZipFile_obj_close(mp_obj_t self) { + ZipFile_t *zf = MP_OBJ_TO_PTR(self); + zipfile_close(zf); + return mp_const_none; +} +STATIC MP_DEFINE_CONST_FUN_OBJ_1(ZipFile_obj_close_obj, ZipFile_obj_close); + +STATIC mp_obj_t ZipFile_obj___exit__(size_t n_args, const mp_obj_t *args) { + (void)n_args; + return ZipFile_obj_close(args[0]); +} +STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(ZipFile_obj___exit___obj, 4, 4, ZipFile_obj___exit__); + +STATIC const mp_rom_map_elem_t ZipFile_locals_dict_table[] = { + { MP_ROM_QSTR(MP_QSTR_namelist), MP_ROM_PTR(&ZipFile_namelist_obj) }, + { MP_ROM_QSTR(MP_QSTR_open), MP_ROM_PTR(&ZipFile_open_obj) }, + { MP_ROM_QSTR(MP_QSTR_close), MP_ROM_PTR(&ZipFile_obj_close_obj) }, + { MP_ROM_QSTR(MP_QSTR___del__), MP_ROM_PTR(&ZipFile_obj_close_obj) }, + { MP_ROM_QSTR(MP_QSTR___enter__), MP_ROM_PTR(&mp_identity_obj) }, + { MP_ROM_QSTR(MP_QSTR___exit__), MP_ROM_PTR(&ZipFile_obj___exit___obj) }, +}; + +STATIC MP_DEFINE_CONST_DICT(ZipFile_locals_dict, ZipFile_locals_dict_table); + +STATIC const mp_obj_type_t ZipFile_type = { + { &mp_type_type }, + .name = MP_QSTR_ZipFile, + .print = ZipFile_print, + .make_new = ZipFile_make_new, + .locals_dict = (mp_obj_dict_t *)&ZipFile_locals_dict, +}; + +STATIC mp_obj_t zf_is_zipfile(mp_obj_t file_or_filename) { + nlr_buf_t nlr; + if (nlr_push(&nlr) == 0) { + ZipFile_t *zf = zipfile_open_file_or_filename(NULL, file_or_filename); + bool is_zipfile = zf->end_directory_posn >= 0; + zipfile_close(zf); + nlr_pop(); + if (is_zipfile) { + return mp_const_true; + } + } + return mp_const_false; +} +STATIC MP_DEFINE_CONST_FUN_OBJ_1(zf_is_zipfile_obj, zf_is_zipfile); + +STATIC const mp_rom_map_elem_t mp_module_zipfile_globals_table[] = { + { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_uzipfile) }, + { MP_ROM_QSTR(MP_QSTR_ZIP_STORED), MP_ROM_INT(ZIP_STORED) }, + { MP_ROM_QSTR(MP_QSTR_ZIP_DEFLATED), MP_ROM_INT(ZIP_DEFLATED) }, + { MP_ROM_QSTR(MP_QSTR_is_zipfile), MP_ROM_PTR(&zf_is_zipfile_obj) }, + { MP_ROM_QSTR(MP_QSTR_ZipFile), MP_ROM_PTR(&ZipFile_type) }, +}; + +STATIC MP_DEFINE_CONST_DICT(mp_module_zipfile_globals, mp_module_zipfile_globals_table); + +const mp_obj_module_t mp_module_uzipfile = { + .base = { &mp_type_module }, + .name = MP_QSTR_uzipfile, + .globals = (mp_obj_dict_t*)&mp_module_zipfile_globals, +}; + +#endif // MICROPY_PY_UZIPFILE diff --git a/extmod/moduzipfile.h b/extmod/moduzipfile.h new file mode 100644 index 0000000000000..ca95b56094961 --- /dev/null +++ b/extmod/moduzipfile.h @@ -0,0 +1,40 @@ +/* + * This file is part of the Micro Python project, http://micropython.org/ + * + * The MIT License (MIT) + * + * Copyright (c) 2016 Dave Hylands + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __MICROPY_INCLUDED_EXTMOD_MODUZIPFILE_H__ +#define __MICROPY_INCLUDED_EXTMOD_MODUZIPFILE_H__ + +#include +#include "py/lexer.h" + +// C API for importing archives from zipfiles. + +mp_import_stat_t zipfile_import_stat(const char *zip_archive_path); +mp_obj_t zipfile_import_open_archive(const char *zip_archive_path); + +#endif // __MICROPY_INCLUDED_EXTMOD_MODUZIPFILE_H__ + + diff --git a/py/builtin.h b/py/builtin.h index 162835cd4f20e..e96a4e099fd95 100644 --- a/py/builtin.h +++ b/py/builtin.h @@ -95,6 +95,7 @@ extern const mp_obj_dict_t mp_module_builtins_globals; // extmod modules extern const mp_obj_module_t mp_module_uctypes; +extern const mp_obj_module_t mp_module_uzipfile; extern const mp_obj_module_t mp_module_uzlib; extern const mp_obj_module_t mp_module_ujson; extern const mp_obj_module_t mp_module_ure; diff --git a/py/mpconfig.h b/py/mpconfig.h index ce049291b0d50..a810d02c5fa0a 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -788,6 +788,10 @@ typedef double mp_float_t; #define MICROPY_PY_UBINASCII (0) #endif +#ifndef MICROPY_PY_UZIPFILE +#define MICROPY_PY_UZIPFILE (0) +#endif + #ifndef MICROPY_PY_URANDOM #define MICROPY_PY_URANDOM (0) #endif diff --git a/py/objmodule.c b/py/objmodule.c index 1034d00f6036c..6a6aac3ac52f6 100644 --- a/py/objmodule.c +++ b/py/objmodule.c @@ -166,6 +166,9 @@ STATIC const mp_rom_map_elem_t mp_builtin_module_table[] = { #if MICROPY_PY_UCTYPES { MP_ROM_QSTR(MP_QSTR_uctypes), MP_ROM_PTR(&mp_module_uctypes) }, #endif +#if MICROPY_PY_UZIPFILE + { MP_ROM_QSTR(MP_QSTR_uzipfile), MP_ROM_PTR(&mp_module_uzipfile) }, +#endif #if MICROPY_PY_UZLIB { MP_ROM_QSTR(MP_QSTR_uzlib), MP_ROM_PTR(&mp_module_uzlib) }, #endif diff --git a/py/py.mk b/py/py.mk index fcde3f7ef5f55..a98e523c2ab9e 100644 --- a/py/py.mk +++ b/py/py.mk @@ -163,6 +163,7 @@ PY_O_BASENAME = \ ../extmod/moductypes.o \ ../extmod/modujson.o \ ../extmod/modure.o \ + ../extmod/moduzipfile.o \ ../extmod/moduzlib.o \ ../extmod/moduheapq.o \ ../extmod/moduhashlib.o \ diff --git a/py/qstrdefs.h b/py/qstrdefs.h index 5a5bc6fb33dd6..b28fa7845b8b7 100644 --- a/py/qstrdefs.h +++ b/py/qstrdefs.h @@ -628,6 +628,17 @@ Q(a2b_base64) Q(b2a_base64) #endif +#if MICROPY_PY_UZIPFILE +Q(uzipfile) +Q(ZIP_STORED) +Q(ZIP_DEFLATED) +Q(is_zipfile) +Q(rb) +Q(ZipFile) +Q(namelist) +Q(ZipExtFile) +#endif + #if MICROPY_PY_MACHINE Q(umachine) Q(mem) diff --git a/tests/extmod/zipfile1.py b/tests/extmod/zipfile1.py new file mode 100644 index 0000000000000..620a451f9fae8 --- /dev/null +++ b/tests/extmod/zipfile1.py @@ -0,0 +1,121 @@ +try: + import uzipfile as zipfile + import ubinascii as binascii +except ImportError: + import zipfile as zipfile + import binascii + +EINVAL = 22 + +files = { + 'empty.zip': b'504b0506000000000000000000000000000000000000', + 'stored.zip': b'504b03040a000000000090933748fbbdefe6150000001500000012001c007a697066696c655f73746f7265642e7478745554090003cf36a456cf36a45675780b000104d588000004e90300007072696e74282748656c6c6f20576f726c6427290a504b01021e030a000000000090933748fbbdefe61500000015000000120018000000000001000000b481000000007a697066696c655f73746f7265642e7478745554050003cf36a45675780b000104d588000004e9030000504b0506000000000100010058000000610000000000', + 'deflated.zip': b'504b0304140000000800f293374836763c6d270000002c00000014001c007a697066696c655f6465666c617465642e74787455540900038837a4560238a45675780b000104d588000004e90300000bc9c82c5600a2448592d4e21285b4cc9c54ae10a8584946aa42716a727e5e8a424e665e2a1700504b01021e03140000000800f293374836763c6d270000002c000000140018000000000001000000b481000000007a697066696c655f6465666c617465642e74787455540500038837a45675780b000104d588000004e9030000504b050600000000010001005a000000750000000000', + 'deflated.txt': b'54686973206973206120746573742066696c650a5468697320697320746865207365636f6e64206c696e650a', + 'stored.txt': b'7072696e74282748656c6c6f20576f726c6427290a', +} + +for filename, data in sorted(files.items()): + # with doesn't work in conjunction with --emit native (part of the test run) + f = open(filename, 'wb') + f.write(binascii.unhexlify(data)) + f.close() + +for filename in sorted(files): + print(filename, 'is_zipfile:', zipfile.is_zipfile(filename)) + if zipfile.is_zipfile(filename): + zf = zipfile.ZipFile(filename) + zf_names = zf.namelist() + print(filename, 'namelist:', zf_names) + zf.close() + +def check_file(zip_filename, data_filename, data): + if type(zip_filename) is str: + print('check_file zip filename:', zip_filename, 'data file', data_filename) + else: + print('check_file zip file', zip_filename.name, 'data file', data_filename) + zf = zipfile.ZipFile(zip_filename) + zef = zf.open(data_filename) + buf = zef.read(len(data)//2 + 10) + cmp = binascii.unhexlify(data) + print('file compare:', buf == cmp) + zef.close() + zf.close() + +check_file('stored.zip', 'zipfile_stored.txt', files['stored.txt']) +check_file('deflated.zip', 'zipfile_deflated.txt', files['deflated.txt']) + + +class ByteFile: + """Class which treats a byestring as a file-like object.""" + + def __init__(self, bytes, name=None): + self.bytes = bytes + self.posn = 0 + if name is None: + name = 'ByteFile object' + self.name = name + + def close(self): + pass + + def read(self, num_bytes=-1): + """CPython's zipfile implementation needs read to be implemented.""" + #print('read', num_bytes) + if num_bytes < 0: + return self.readall() + size = num_bytes + if self.posn + size > len(self.bytes): + size = len(self.bytes) - self.posn + if size <= 0: + return b'' + buf = bytearray(self.bytes[self.posn:self.posn+size]) + #print('read @', self.posn, binascii.hexlify(buf)) + self.posn += len(buf) + return buf + + def readall(self): + buf = self.bytes[self.posn:] + #print('readall @', self.posn, binascii.hexlify(buf)) + self.posn = len(self.bytes) + return buf + + def readinto(self, buf): + size = len(buf) + if self.posn + size > len(self.bytes): + size = len(self.bytes) - self.posn + if size <= 0: + return 0 + buf[0:size] = self.bytes[self.posn:self.posn+size] + #print('readinto @', self.posn, binascii.hexlify(buf[0:size])) + self.posn += size + return size + + def seek(self, offset, whence): + if whence == 0: + self.posn = offset + elif whence == 1: + self.posn += offset + elif whence == 2: + self.posn = len(self.bytes) + offset + #print('seek', offset, whence, 'got', self.posn) + if self.posn < 0 or self.posn > len(self.bytes): + raise OSError(EINVAL) + return self.posn + + def tell(self): + return self.posn + +print('===== Checking ByteFile =====') + +for filename, data in sorted(files.items()): + bf = ByteFile(binascii.unhexlify(data), name=filename) + print(filename, 'is_zipfile:', zipfile.is_zipfile(bf)) + if zipfile.is_zipfile(bf): + zf = zipfile.ZipFile(bf) + zf_names = zf.namelist() + print(filename, 'namelist:', zf_names) + zf.close() + +check_file(ByteFile(binascii.unhexlify(files['stored.zip'])), 'zipfile_stored.txt', files['stored.txt']) +check_file(ByteFile(binascii.unhexlify(files['deflated.zip'])), 'zipfile_deflated.txt', files['deflated.txt']) diff --git a/unix/mpconfigport.h b/unix/mpconfigport.h index f7fdeec07c949..5ef551ac84e6d 100644 --- a/unix/mpconfigport.h +++ b/unix/mpconfigport.h @@ -108,6 +108,8 @@ #ifndef MICROPY_PY_USELECT #define MICROPY_PY_USELECT (1) #endif +#define MICROPY_PY_UZIPFILE (1) +#define MICROPY_PY_FILE_LIKE (1) #define MICROPY_PY_MACHINE (1) #define MICROPY_MACHINE_MEM_GET_READ_ADDR mod_machine_mem_get_addr #define MICROPY_MACHINE_MEM_GET_WRITE_ADDR mod_machine_mem_get_addr From 793324bdbf41926ac8ab7c772b2833d29b83d27f Mon Sep 17 00:00:00 2001 From: Dave Hylands Date: Fri, 29 Jan 2016 23:42:45 -0800 Subject: [PATCH 3/3] py: Add support for importing from a zipfile --- py/builtinimport.c | 58 +++++++++++++++++++++++++--- py/emitglue.c | 53 +++++++++++++++++++++++++- py/emitglue.h | 1 + py/lexer.h | 5 +++ py/lexermpfile.c | 82 ++++++++++++++++++++++++++++++++++++++++ py/mpconfig.h | 9 +++++ py/py.mk | 1 + py/qstrdefs.h | 4 ++ stmhal/mpconfigport.h | 4 ++ tests/extmod/zipimp.py | 7 ++++ tests/extmod/zipimp.zip | Bin 0 -> 689 bytes tests/run-tests | 2 + unix/mpconfigport.h | 2 + 13 files changed, 221 insertions(+), 7 deletions(-) create mode 100644 py/lexermpfile.c create mode 100644 tests/extmod/zipimp.py create mode 100644 tests/extmod/zipimp.zip diff --git a/py/builtinimport.c b/py/builtinimport.c index ec79357cb7b27..34e24c4febf46 100644 --- a/py/builtinimport.c +++ b/py/builtinimport.c @@ -36,6 +36,8 @@ #include "py/builtin.h" #include "py/frozenmod.h" +#include "extmod/moduzipfile.h" + #if 0 // print debugging info #define DEBUG_PRINT (1) #define DEBUG_printf DEBUG_printf @@ -60,22 +62,42 @@ bool mp_obj_is_package(mp_obj_t module) { return dest[0] != MP_OBJ_NULL; } +#if DEBUG_PRINT +STATIC const char *stat_str[] = { "doesn't exit", "dir", "file" }; +#endif + +STATIC mp_import_stat_t import_stat(const char *path) { + mp_import_stat_t stat; + +#if MICROPY_PY_ZIPIMPORT + stat = zipfile_import_stat(path); + if (stat != MP_IMPORT_STAT_NO_EXIST) { + DEBUG_printf("zipimport import_stat('%s') returned %s\n", path, stat_str[stat]); + return stat; + } +#endif + + stat = mp_import_stat(path); + DEBUG_printf("import_stat('%s') returned %s\n", path, stat_str[stat]); + return stat; +} + STATIC mp_import_stat_t stat_dir_or_file(vstr_t *path) { - mp_import_stat_t stat = mp_import_stat(vstr_null_terminated_str(path)); + mp_import_stat_t stat = import_stat(vstr_null_terminated_str(path)); DEBUG_printf("stat %s: %d\n", vstr_str(path), stat); if (stat == MP_IMPORT_STAT_DIR) { return stat; } vstr_add_str(path, ".py"); - stat = mp_import_stat(vstr_null_terminated_str(path)); + stat = import_stat(vstr_null_terminated_str(path)); if (stat == MP_IMPORT_STAT_FILE) { return stat; } #if MICROPY_PERSISTENT_CODE_LOAD vstr_ins_byte(path, path->len - 2, 'm'); - stat = mp_import_stat(vstr_null_terminated_str(path)); + stat = import_stat(vstr_null_terminated_str(path)); if (stat == MP_IMPORT_STAT_FILE) { return stat; } @@ -85,6 +107,7 @@ STATIC mp_import_stat_t stat_dir_or_file(vstr_t *path) { } STATIC mp_import_stat_t find_file(const char *file_str, uint file_len, vstr_t *dest) { + DEBUG_printf("find_file(%s)\n", file_str); #if MICROPY_PY_SYS // extract the list of paths mp_uint_t path_num; @@ -103,6 +126,9 @@ STATIC mp_import_stat_t find_file(const char *file_str, uint file_len, vstr_t *d vstr_reset(dest); mp_uint_t p_len; const char *p = mp_obj_str_get_data(path_items[i], &p_len); + + DEBUG_printf("find_file: Checking sys.path entry '%s' file_str = '%s'\n", p, file_str); + if (p_len > 0) { vstr_add_strn(dest, p, p_len); vstr_add_char(dest, PATH_SEP_CHAR); @@ -187,7 +213,17 @@ STATIC void do_load(mp_obj_t module_obj, vstr_t *file) { #if MICROPY_PERSISTENT_CODE_LOAD if (file_str[file->len - 3] == 'm') { - mp_raw_code_t *raw_code = mp_raw_code_load_file(file_str); + mp_raw_code_t *raw_code; + + #if MICROPY_PY_ZIPIMPORT + mp_obj_t zip_file = zipfile_import_open_archive(file_str); + if (zip_file != mp_const_none && MP_OBJ_IS_OBJ(zip_file)) { + raw_code = mp_raw_code_load_file_obj(zip_file); + } else + #endif + { + raw_code = mp_raw_code_load_file(file_str); + } do_execute_raw_code(module_obj, raw_code); return; } @@ -195,7 +231,17 @@ STATIC void do_load(mp_obj_t module_obj, vstr_t *file) { #if MICROPY_ENABLE_COMPILER { - mp_lexer_t *lex = mp_lexer_new_from_file(file_str); + mp_lexer_t *lex; + + #if MICROPY_PY_ZIPIMPORT + mp_obj_t zip_file = zipfile_import_open_archive(file_str); + if (zip_file != mp_const_none && MP_OBJ_IS_OBJ(zip_file)) { + lex = mp_lexer_new_from_file_obj(qstr_from_str(file_str), zip_file); + } else + #endif + { + lex = mp_lexer_new_from_file(file_str); + } do_load_from_lexer(module_obj, lex, file_str); } #else @@ -434,7 +480,7 @@ mp_obj_t mp_builtin___import__(size_t n_args, const mp_obj_t *args) { mp_store_attr(module_obj, MP_QSTR___path__, mp_obj_new_str(vstr_str(&path), vstr_len(&path), false)); vstr_add_char(&path, PATH_SEP_CHAR); vstr_add_str(&path, "__init__.py"); - if (mp_import_stat(vstr_null_terminated_str(&path)) != MP_IMPORT_STAT_FILE) { + if (import_stat(vstr_null_terminated_str(&path)) != MP_IMPORT_STAT_FILE) { vstr_cut_tail_bytes(&path, sizeof("/__init__.py") - 1); // cut off /__init__.py mp_warning("%s is imported as namespace package", vstr_str(&path)); } else { diff --git a/py/emitglue.c b/py/emitglue.c index 4157593ba316b..d5f96dadcfd99 100644 --- a/py/emitglue.c +++ b/py/emitglue.c @@ -34,6 +34,7 @@ #include "py/emitglue.h" #include "py/runtime0.h" #include "py/bc.h" +#include "py/mpfile.h" #if 0 // print debugging info #define DEBUG_PRINT (1) @@ -396,7 +397,57 @@ mp_raw_code_t *mp_raw_code_load_mem(const byte *buf, size_t len) { // here we define mp_raw_code_load_file depending on the port // TODO abstract this away properly -#if defined(__i386__) || defined(__x86_64__) +#if defined(MICROPY_PY_FILE_LIKE) +// Universal file reader + +typedef struct _mp_lexer_file_buf_t { + mp_file_t *file; + byte buf[20]; + mp_uint_t len; + mp_uint_t pos; +} mp_lexer_file_buf_t; + +STATIC mp_uint_t mp_file_buf_next_byte(void *fb_in) { + mp_lexer_file_buf_t *fb = fb_in; + if (fb->pos >= fb->len) { + if (fb->len == 0) { + return (mp_uint_t)-1; + } else { + int n = mp_readinto(fb->file, fb->buf, sizeof(fb->buf)); + if (n <= 0) { + fb->len = 0; + return (mp_uint_t)-1; + } + fb->len = n; + fb->pos = 0; + } + } + return fb->buf[fb->pos++]; +} + +STATIC mp_raw_code_t *mp_raw_code_load_mp_file(mp_file_t *file) { + mp_lexer_file_buf_t fb; + fb.file = file; + int n = mp_readinto(fb.file, fb.buf, sizeof(fb.buf)); + fb.len = n; + fb.pos = 0; + mp_reader_t reader; + reader.data = &fb; + reader.read_byte = mp_file_buf_next_byte; + mp_raw_code_t *rc = mp_raw_code_load(&reader); + mp_close(fb.file); + return rc; +} + +mp_raw_code_t *mp_raw_code_load_file(const char *filename) { + return mp_raw_code_load_mp_file(mp_open(filename, "rb")); +} + +mp_raw_code_t *mp_raw_code_load_file_obj(mp_obj_t file_obj) { + return mp_raw_code_load_mp_file(mp_file_from_file_obj(file_obj)); +} + +#elif defined(__i386__) || defined(__x86_64__) // unix file reader #include diff --git a/py/emitglue.h b/py/emitglue.h index b31e8dbb221b2..60368ea8838d2 100644 --- a/py/emitglue.h +++ b/py/emitglue.h @@ -64,6 +64,7 @@ typedef struct _mp_reader_t { mp_raw_code_t *mp_raw_code_load(mp_reader_t *reader); mp_raw_code_t *mp_raw_code_load_mem(const byte *buf, size_t len); mp_raw_code_t *mp_raw_code_load_file(const char *filename); +mp_raw_code_t *mp_raw_code_load_file_obj(mp_obj_t file_obj); #endif #if MICROPY_PERSISTENT_CODE_SAVE diff --git a/py/lexer.h b/py/lexer.h index 36d1e99d23f23..acf9b8e868ba1 100644 --- a/py/lexer.h +++ b/py/lexer.h @@ -30,6 +30,7 @@ #include "py/mpconfig.h" #include "py/qstr.h" +#include "py/obj.h" /* lexer.h -- simple tokeniser for Micro Python * @@ -198,4 +199,8 @@ mp_lexer_t *mp_lexer_new_from_file(const char *filename); mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd); #endif +#if MICROPY_HELPER_LEXER_MPFILE +mp_lexer_t *mp_lexer_new_from_file_obj(qstr filename, mp_obj_t file_obj); +#endif + #endif // __MICROPY_INCLUDED_PY_LEXER_H__ diff --git a/py/lexermpfile.c b/py/lexermpfile.c new file mode 100644 index 0000000000000..a5a49350018b6 --- /dev/null +++ b/py/lexermpfile.c @@ -0,0 +1,82 @@ +/* + * This file is part of the Micro Python project, http://micropython.org/ + * + * The MIT License (MIT) + * + * Copyright (c) 2016 Damien P. George + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "py/mpconfig.h" + +#if MICROPY_HELPER_LEXER_MPFILE + +#include +#include + +#include "py/lexer.h" +#include "py/mpfile.h" + +typedef struct _mp_lexer_file_buf_t { + mp_file_t *file; + byte buf[20]; + mp_uint_t len; + mp_uint_t pos; +} mp_lexer_file_buf_t; + +STATIC mp_uint_t mp_file_buf_next_byte(void *lexer_fb) { + mp_lexer_file_buf_t *fb = lexer_fb; + if (fb->pos >= fb->len) { + if (fb->len == 0) { + return MP_LEXER_EOF; + } + + int n = mp_readinto(fb->file, fb->buf, sizeof(fb->buf)); + if (n <= 0) { + fb->len = 0; + return MP_LEXER_EOF; + } + fb->len = n; + fb->pos = 0; + } + return fb->buf[fb->pos++]; +} + +STATIC void mp_file_buf_close(void *lexer_fb) { + mp_lexer_file_buf_t *fb = lexer_fb; + mp_close(fb->file); + fb->file = NULL; + m_del_obj(mp_lexer_file_buf_t, fb); +} + +mp_lexer_t *mp_lexer_new_from_file_obj(qstr filename, mp_obj_t file_obj) { + mp_lexer_file_buf_t *fb = m_new_obj_maybe(mp_lexer_file_buf_t); + if (fb == NULL) { + return NULL; + } + fb->file = mp_file_from_file_obj(file_obj); + int n = mp_readinto(fb->file, fb->buf, sizeof(fb->buf)); + fb->len = n; + fb->pos = 0; + + return mp_lexer_new(filename, fb, mp_file_buf_next_byte, mp_file_buf_close); +} + +#endif // MICROPY_HELPER_LEXER_MPFILE diff --git a/py/mpconfig.h b/py/mpconfig.h index a810d02c5fa0a..9c5b21e79a9f7 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -402,6 +402,11 @@ #define MICROPY_HELPER_LEXER_UNIX (0) #endif +// Whether to include lexer helper function for mpfile (needed for zipimport) +#ifndef MICROPY_HELPER_LEXER_MPFILE +#define MICROPY_HELPER_LEXER_MPFILE (0) +#endif + // Long int implementation #define MICROPY_LONGINT_IMPL_NONE (0) #define MICROPY_LONGINT_IMPL_LONGLONG (1) @@ -792,6 +797,10 @@ typedef double mp_float_t; #define MICROPY_PY_UZIPFILE (0) #endif +#ifndef MICROPY_PY_ZIPIMPORT +#define MICROPY_PY_ZIPIMPORT (0) +#endif + #ifndef MICROPY_PY_URANDOM #define MICROPY_PY_URANDOM (0) #endif diff --git a/py/py.mk b/py/py.mk index a98e523c2ab9e..c1730070ad25f 100644 --- a/py/py.mk +++ b/py/py.mk @@ -73,6 +73,7 @@ PY_O_BASENAME = \ unicode.o \ mpz.o \ lexer.o \ + lexermpfile.o \ lexerstr.o \ lexerunix.o \ parse.o \ diff --git a/py/qstrdefs.h b/py/qstrdefs.h index b28fa7845b8b7..df2e12fcd6a87 100644 --- a/py/qstrdefs.h +++ b/py/qstrdefs.h @@ -636,7 +636,11 @@ Q(is_zipfile) Q(rb) Q(ZipFile) Q(namelist) +#endif +#if MICROPY_PY_UZIPFILE || MICROPY_PY_ZIPIMPORT Q(ZipExtFile) + + #endif #if MICROPY_PY_MACHINE diff --git a/stmhal/mpconfigport.h b/stmhal/mpconfigport.h index eaf1da716d64d..d0797e74f3a6c 100644 --- a/stmhal/mpconfigport.h +++ b/stmhal/mpconfigport.h @@ -88,6 +88,10 @@ #define MICROPY_PY_UHEAPQ (1) #define MICROPY_PY_UHASHLIB (1) #define MICROPY_PY_MACHINE (1) +#define MICROPY_PY_FILE_LIKE (1) +#define MICROPY_PY_UZIPFILE (0) +#define MICROPY_PY_ZIPIMPORT (1) +#define MICROPY_HELPER_LEXER_MPFILE (1) #define MICROPY_ENABLE_EMERGENCY_EXCEPTION_BUF (1) #define MICROPY_EMERGENCY_EXCEPTION_BUF_SIZE (0) diff --git a/tests/extmod/zipimp.py b/tests/extmod/zipimp.py new file mode 100644 index 0000000000000..1790f935c331b --- /dev/null +++ b/tests/extmod/zipimp.py @@ -0,0 +1,7 @@ +import sys + +sys.path.insert(0, __file__.replace('.py', '.zip')) + +import foo +import foo.tst +import bar diff --git a/tests/extmod/zipimp.zip b/tests/extmod/zipimp.zip new file mode 100644 index 0000000000000000000000000000000000000000..4bd59b2588a0d60aec6909ced734a406a5a0ce53 GIT binary patch literal 689 zcmWIWW@h1H00FMKHXdLGlwe_yVMxo**AEThWMEdgzbfqMyH#PO72FJrELS@i7+791 zGcW|8=)e- zl1_C^E`&{c=Gb^#QCpKB1GEK%xu7_%j0?pXcXa_Y3DFj192pftSu*in4fEo%5W}takf*9G6m;s4w9tY5^uwVt6g(XM> Uyjj_RmNKvap*2u-0?2m^0PQcDtpET3 literal 0 HcmV?d00001 diff --git a/tests/run-tests b/tests/run-tests index 71ff3f7122b85..f97c5a46382e3 100755 --- a/tests/run-tests +++ b/tests/run-tests @@ -224,6 +224,8 @@ def run_tests(pyb, tests, args): # Some tests use unsupported features on Windows if os.name == 'nt': skip_tests.add('import/import_file.py') # works but CPython prints forward slashes + skip_tests.add('extmod/zipfile1.py') # not enabled (yet) for windows + skip_tests.add('extmod/zipimp.py') # not enabled (yet) for windows # Some tests are known to fail with native emitter # Remove them from the below when they work diff --git a/unix/mpconfigport.h b/unix/mpconfigport.h index 5ef551ac84e6d..4aadfb37341a8 100644 --- a/unix/mpconfigport.h +++ b/unix/mpconfigport.h @@ -110,6 +110,8 @@ #endif #define MICROPY_PY_UZIPFILE (1) #define MICROPY_PY_FILE_LIKE (1) +#define MICROPY_PY_ZIPIMPORT (1) +#define MICROPY_HELPER_LEXER_MPFILE (1) #define MICROPY_PY_MACHINE (1) #define MICROPY_MACHINE_MEM_GET_READ_ADDR mod_machine_mem_get_addr #define MICROPY_MACHINE_MEM_GET_WRITE_ADDR mod_machine_mem_get_addr