| 1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ |
| 2 | #ifndef _UAPI_LINUX_FS_H |
| 3 | #define _UAPI_LINUX_FS_H |
| 4 | |
| 5 | /* |
| 6 | * This file has definitions for some important file table structures |
| 7 | * and constants and structures used by various generic file system |
| 8 | * ioctl's. Please do not make any changes in this file before |
| 9 | * sending patches for review to linux-fsdevel@vger.kernel.org and |
| 10 | * linux-api@vger.kernel.org. |
| 11 | */ |
| 12 | |
| 13 | #include <linux/limits.h> |
| 14 | #include <linux/ioctl.h> |
| 15 | #include <linux/types.h> |
| 16 | #ifndef __KERNEL__ |
| 17 | #include <linux/fscrypt.h> |
| 18 | #endif |
| 19 | |
| 20 | /* Use of MS_* flags within the kernel is restricted to core mount(2) code. */ |
| 21 | #if !defined(__KERNEL__) |
| 22 | #include <linux/mount.h> |
| 23 | #endif |
| 24 | |
| 25 | /* |
| 26 | * It's silly to have NR_OPEN bigger than NR_FILE, but you can change |
| 27 | * the file limit at runtime and only root can increase the per-process |
| 28 | * nr_file rlimit, so it's safe to set up a ridiculously high absolute |
| 29 | * upper limit on files-per-process. |
| 30 | * |
| 31 | * Some programs (notably those using select()) may have to be |
| 32 | * recompiled to take full advantage of the new limits.. |
| 33 | */ |
| 34 | |
| 35 | /* Fixed constants first: */ |
| 36 | #undef NR_OPEN |
| 37 | #define INR_OPEN_CUR 1024 /* Initial setting for nfile rlimits */ |
| 38 | #define INR_OPEN_MAX 4096 /* Hard limit for nfile rlimits */ |
| 39 | |
| 40 | #define BLOCK_SIZE_BITS 10 |
| 41 | #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) |
| 42 | |
| 43 | /* flags for integrity meta */ |
| 44 | #define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */ |
| 45 | #define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */ |
| 46 | #define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */ |
| 47 | |
| 48 | #define IO_INTEGRITY_VALID_FLAGS (IO_INTEGRITY_CHK_GUARD | \ |
| 49 | IO_INTEGRITY_CHK_REFTAG | \ |
| 50 | IO_INTEGRITY_CHK_APPTAG) |
| 51 | |
| 52 | #define SEEK_SET 0 /* seek relative to beginning of file */ |
| 53 | #define SEEK_CUR 1 /* seek relative to current file position */ |
| 54 | #define SEEK_END 2 /* seek relative to end of file */ |
| 55 | #define SEEK_DATA 3 /* seek to the next data */ |
| 56 | #define SEEK_HOLE 4 /* seek to the next hole */ |
| 57 | #define SEEK_MAX SEEK_HOLE |
| 58 | |
| 59 | #define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ |
| 60 | #define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ |
| 61 | #define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ |
| 62 | |
| 63 | /* |
| 64 | * The root inode of procfs is guaranteed to always have the same inode number. |
| 65 | * For programs that make heavy use of procfs, verifying that the root is a |
| 66 | * real procfs root and using openat2(RESOLVE_{NO_{XDEV,MAGICLINKS},BENEATH}) |
| 67 | * will allow you to make sure you are never tricked into operating on the |
| 68 | * wrong procfs file. |
| 69 | */ |
| 70 | enum procfs_ino { |
| 71 | PROCFS_ROOT_INO = 1, |
| 72 | }; |
| 73 | |
| 74 | struct file_clone_range { |
| 75 | __s64 src_fd; |
| 76 | __u64 src_offset; |
| 77 | __u64 src_length; |
| 78 | __u64 dest_offset; |
| 79 | }; |
| 80 | |
| 81 | struct fstrim_range { |
| 82 | __u64 start; |
| 83 | __u64 len; |
| 84 | __u64 minlen; |
| 85 | }; |
| 86 | |
| 87 | /* |
| 88 | * We include a length field because some filesystems (vfat) have an identifier |
| 89 | * that we do want to expose as a UUID, but doesn't have the standard length. |
| 90 | * |
| 91 | * We use a fixed size buffer beacuse this interface will, by fiat, never |
| 92 | * support "UUIDs" longer than 16 bytes; we don't want to force all downstream |
| 93 | * users to have to deal with that. |
| 94 | */ |
| 95 | struct fsuuid2 { |
| 96 | __u8 len; |
| 97 | __u8 uuid[16]; |
| 98 | }; |
| 99 | |
| 100 | struct fs_sysfs_path { |
| 101 | __u8 len; |
| 102 | __u8 name[128]; |
| 103 | }; |
| 104 | |
| 105 | /* Protection info capability flags */ |
| 106 | #define LBMD_PI_CAP_INTEGRITY (1 << 0) |
| 107 | #define LBMD_PI_CAP_REFTAG (1 << 1) |
| 108 | |
| 109 | /* Checksum types for Protection Information */ |
| 110 | #define LBMD_PI_CSUM_NONE 0 |
| 111 | #define LBMD_PI_CSUM_IP 1 |
| 112 | #define LBMD_PI_CSUM_CRC16_T10DIF 2 |
| 113 | #define LBMD_PI_CSUM_CRC64_NVME 4 |
| 114 | |
| 115 | /* sizeof first published struct */ |
| 116 | #define LBMD_SIZE_VER0 16 |
| 117 | |
| 118 | /* |
| 119 | * Logical block metadata capability descriptor |
| 120 | * If the device does not support metadata, all the fields will be zero. |
| 121 | * Applications must check lbmd_flags to determine whether metadata is |
| 122 | * supported or not. |
| 123 | */ |
| 124 | struct logical_block_metadata_cap { |
| 125 | /* Bitmask of logical block metadata capability flags */ |
| 126 | __u32 lbmd_flags; |
| 127 | /* |
| 128 | * The amount of data described by each unit of logical block |
| 129 | * metadata |
| 130 | */ |
| 131 | __u16 lbmd_interval; |
| 132 | /* |
| 133 | * Size in bytes of the logical block metadata associated with each |
| 134 | * interval |
| 135 | */ |
| 136 | __u8 lbmd_size; |
| 137 | /* |
| 138 | * Size in bytes of the opaque block tag associated with each |
| 139 | * interval |
| 140 | */ |
| 141 | __u8 lbmd_opaque_size; |
| 142 | /* |
| 143 | * Offset in bytes of the opaque block tag within the logical block |
| 144 | * metadata |
| 145 | */ |
| 146 | __u8 lbmd_opaque_offset; |
| 147 | /* Size in bytes of the T10 PI tuple associated with each interval */ |
| 148 | __u8 lbmd_pi_size; |
| 149 | /* Offset in bytes of T10 PI tuple within the logical block metadata */ |
| 150 | __u8 lbmd_pi_offset; |
| 151 | /* T10 PI guard tag type */ |
| 152 | __u8 lbmd_guard_tag_type; |
| 153 | /* Size in bytes of the T10 PI application tag */ |
| 154 | __u8 lbmd_app_tag_size; |
| 155 | /* Size in bytes of the T10 PI reference tag */ |
| 156 | __u8 lbmd_ref_tag_size; |
| 157 | /* Size in bytes of the T10 PI storage tag */ |
| 158 | __u8 lbmd_storage_tag_size; |
| 159 | __u8 pad; |
| 160 | }; |
| 161 | |
| 162 | /* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */ |
| 163 | #define FILE_DEDUPE_RANGE_SAME 0 |
| 164 | #define FILE_DEDUPE_RANGE_DIFFERS 1 |
| 165 | |
| 166 | /* from struct btrfs_ioctl_file_extent_same_info */ |
| 167 | struct file_dedupe_range_info { |
| 168 | __s64 dest_fd; /* in - destination file */ |
| 169 | __u64 dest_offset; /* in - start of extent in destination */ |
| 170 | __u64 bytes_deduped; /* out - total # of bytes we were able |
| 171 | * to dedupe from this file. */ |
| 172 | /* status of this dedupe operation: |
| 173 | * < 0 for error |
| 174 | * == FILE_DEDUPE_RANGE_SAME if dedupe succeeds |
| 175 | * == FILE_DEDUPE_RANGE_DIFFERS if data differs |
| 176 | */ |
| 177 | __s32 status; /* out - see above description */ |
| 178 | __u32 reserved; /* must be zero */ |
| 179 | }; |
| 180 | |
| 181 | /* from struct btrfs_ioctl_file_extent_same_args */ |
| 182 | struct file_dedupe_range { |
| 183 | __u64 src_offset; /* in - start of extent in source */ |
| 184 | __u64 src_length; /* in - length of extent */ |
| 185 | __u16 dest_count; /* in - total elements in info array */ |
| 186 | __u16 reserved1; /* must be zero */ |
| 187 | __u32 reserved2; /* must be zero */ |
| 188 | struct file_dedupe_range_info info[]; |
| 189 | }; |
| 190 | |
| 191 | /* And dynamically-tunable limits and defaults: */ |
| 192 | struct files_stat_struct { |
| 193 | unsigned long nr_files; /* read only */ |
| 194 | unsigned long nr_free_files; /* read only */ |
| 195 | unsigned long max_files; /* tunable */ |
| 196 | }; |
| 197 | |
| 198 | struct inodes_stat_t { |
| 199 | long nr_inodes; |
| 200 | long nr_unused; |
| 201 | long dummy[5]; /* padding for sysctl ABI compatibility */ |
| 202 | }; |
| 203 | |
| 204 | |
| 205 | #define NR_FILE 8192 /* this can well be larger on a larger system */ |
| 206 | |
| 207 | /* |
| 208 | * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR. |
| 209 | */ |
| 210 | struct fsxattr { |
| 211 | __u32 fsx_xflags; /* xflags field value (get/set) */ |
| 212 | __u32 fsx_extsize; /* extsize field value (get/set)*/ |
| 213 | __u32 fsx_nextents; /* nextents field value (get) */ |
| 214 | __u32 fsx_projid; /* project identifier (get/set) */ |
| 215 | __u32 fsx_cowextsize; /* CoW extsize field value (get/set)*/ |
| 216 | unsigned char fsx_pad[8]; |
| 217 | }; |
| 218 | |
| 219 | /* |
| 220 | * Variable size structure for file_[sg]et_attr(). |
| 221 | * |
| 222 | * Note. This is alternative to the structure 'struct file_kattr'/'struct fsxattr'. |
| 223 | * As this structure is passed to/from userspace with its size, this can |
| 224 | * be versioned based on the size. |
| 225 | */ |
| 226 | struct file_attr { |
| 227 | __u64 fa_xflags; /* xflags field value (get/set) */ |
| 228 | __u32 fa_extsize; /* extsize field value (get/set)*/ |
| 229 | __u32 fa_nextents; /* nextents field value (get) */ |
| 230 | __u32 fa_projid; /* project identifier (get/set) */ |
| 231 | __u32 fa_cowextsize; /* CoW extsize field value (get/set) */ |
| 232 | }; |
| 233 | |
| 234 | #define FILE_ATTR_SIZE_VER0 24 |
| 235 | #define FILE_ATTR_SIZE_LATEST FILE_ATTR_SIZE_VER0 |
| 236 | |
| 237 | /* |
| 238 | * Flags for the fsx_xflags field |
| 239 | */ |
| 240 | #define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */ |
| 241 | #define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */ |
| 242 | #define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */ |
| 243 | #define FS_XFLAG_APPEND 0x00000010 /* all writes append */ |
| 244 | #define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */ |
| 245 | #define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */ |
| 246 | #define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */ |
| 247 | #define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */ |
| 248 | #define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */ |
| 249 | #define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ |
| 250 | #define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ |
| 251 | #define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ |
| 252 | #define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ |
| 253 | #define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ |
| 254 | #define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */ |
| 255 | #define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */ |
| 256 | #define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ |
| 257 | |
| 258 | /* the read-only stuff doesn't really belong here, but any other place is |
| 259 | probably as bad and I don't want to create yet another include file. */ |
| 260 | |
| 261 | #define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */ |
| 262 | #define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */ |
| 263 | #define BLKRRPART _IO(0x12,95) /* re-read partition table */ |
| 264 | #define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */ |
| 265 | #define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */ |
| 266 | #define BLKRASET _IO(0x12,98) /* set read ahead for block device */ |
| 267 | #define BLKRAGET _IO(0x12,99) /* get current read ahead setting */ |
| 268 | #define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */ |
| 269 | #define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */ |
| 270 | #define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */ |
| 271 | #define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */ |
| 272 | #define BLKSSZGET _IO(0x12,104)/* get block device sector size */ |
| 273 | #if 0 |
| 274 | #define BLKPG _IO(0x12,105)/* See blkpg.h */ |
| 275 | |
| 276 | /* Some people are morons. Do not use sizeof! */ |
| 277 | |
| 278 | #define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */ |
| 279 | #define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */ |
| 280 | /* This was here just to show that the number is taken - |
| 281 | probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */ |
| 282 | #endif |
| 283 | /* A jump here: 108-111 have been used for various private purposes. */ |
| 284 | #define BLKBSZGET _IOR(0x12,112,size_t) |
| 285 | #define BLKBSZSET _IOW(0x12,113,size_t) |
| 286 | #define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ |
| 287 | #define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup) |
| 288 | #define BLKTRACESTART _IO(0x12,116) |
| 289 | #define BLKTRACESTOP _IO(0x12,117) |
| 290 | #define BLKTRACETEARDOWN _IO(0x12,118) |
| 291 | #define BLKDISCARD _IO(0x12,119) |
| 292 | #define BLKIOMIN _IO(0x12,120) |
| 293 | #define BLKIOOPT _IO(0x12,121) |
| 294 | #define BLKALIGNOFF _IO(0x12,122) |
| 295 | #define BLKPBSZGET _IO(0x12,123) |
| 296 | #define BLKDISCARDZEROES _IO(0x12,124) |
| 297 | #define BLKSECDISCARD _IO(0x12,125) |
| 298 | #define BLKROTATIONAL _IO(0x12,126) |
| 299 | #define BLKZEROOUT _IO(0x12,127) |
| 300 | #define BLKGETDISKSEQ _IOR(0x12,128,__u64) |
| 301 | /* 130-136 and 142 are used by zoned block device ioctls (uapi/linux/blkzoned.h) */ |
| 302 | /* 137-141 are used by blk-crypto ioctls (uapi/linux/blk-crypto.h) */ |
| 303 | #define BLKTRACESETUP2 _IOWR(0x12, 142, struct blk_user_trace_setup2) |
| 304 | |
| 305 | #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ |
| 306 | #define FIBMAP _IO(0x00,1) /* bmap access */ |
| 307 | #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ |
| 308 | #define FIFREEZE _IOWR('X', 119, int) /* Freeze */ |
| 309 | #define FITHAW _IOWR('X', 120, int) /* Thaw */ |
| 310 | #define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ |
| 311 | #define FICLONE _IOW(0x94, 9, int) |
| 312 | #define FICLONERANGE _IOW(0x94, 13, struct file_clone_range) |
| 313 | #define FIDEDUPERANGE _IOWR(0x94, 54, struct file_dedupe_range) |
| 314 | |
| 315 | #define FSLABEL_MAX 256 /* Max chars for the interface; each fs may differ */ |
| 316 | |
| 317 | #define FS_IOC_GETFLAGS _IOR('f', 1, long) |
| 318 | #define FS_IOC_SETFLAGS _IOW('f', 2, long) |
| 319 | #define FS_IOC_GETVERSION _IOR('v', 1, long) |
| 320 | #define FS_IOC_SETVERSION _IOW('v', 2, long) |
| 321 | #define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) |
| 322 | #define FS_IOC32_GETFLAGS _IOR('f', 1, int) |
| 323 | #define FS_IOC32_SETFLAGS _IOW('f', 2, int) |
| 324 | #define FS_IOC32_GETVERSION _IOR('v', 1, int) |
| 325 | #define FS_IOC32_SETVERSION _IOW('v', 2, int) |
| 326 | #define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr) |
| 327 | #define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr) |
| 328 | #define FS_IOC_GETFSLABEL _IOR(0x94, 49, char[FSLABEL_MAX]) |
| 329 | #define FS_IOC_SETFSLABEL _IOW(0x94, 50, char[FSLABEL_MAX]) |
| 330 | /* Returns the external filesystem UUID, the same one blkid returns */ |
| 331 | #define FS_IOC_GETFSUUID _IOR(0x15, 0, struct fsuuid2) |
| 332 | /* |
| 333 | * Returns the path component under /sys/fs/ that refers to this filesystem; |
| 334 | * also /sys/kernel/debug/ for filesystems with debugfs exports |
| 335 | */ |
| 336 | #define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path) |
| 337 | /* Get logical block metadata capability details */ |
| 338 | #define FS_IOC_GETLBMD_CAP _IOWR(0x15, 2, struct logical_block_metadata_cap) |
| 339 | |
| 340 | /* |
| 341 | * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) |
| 342 | * |
| 343 | * Note: for historical reasons, these flags were originally used and |
| 344 | * defined for use by ext2/ext3, and then other file systems started |
| 345 | * using these flags so they wouldn't need to write their own version |
| 346 | * of chattr/lsattr (which was shipped as part of e2fsprogs). You |
| 347 | * should think twice before trying to use these flags in new |
| 348 | * contexts, or trying to assign these flags, since they are used both |
| 349 | * as the UAPI and the on-disk encoding for ext2/3/4. Also, we are |
| 350 | * almost out of 32-bit flags. :-) |
| 351 | * |
| 352 | * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from |
| 353 | * XFS to the generic FS level interface. This uses a structure that |
| 354 | * has padding and hence has more room to grow, so it may be more |
| 355 | * appropriate for many new use cases. |
| 356 | * |
| 357 | * Please do not change these flags or interfaces before checking with |
| 358 | * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org. |
| 359 | */ |
| 360 | #define FS_SECRM_FL 0x00000001 /* Secure deletion */ |
| 361 | #define FS_UNRM_FL 0x00000002 /* Undelete */ |
| 362 | #define FS_COMPR_FL 0x00000004 /* Compress file */ |
| 363 | #define FS_SYNC_FL 0x00000008 /* Synchronous updates */ |
| 364 | #define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ |
| 365 | #define FS_APPEND_FL 0x00000020 /* writes to file may only append */ |
| 366 | #define FS_NODUMP_FL 0x00000040 /* do not dump file */ |
| 367 | #define FS_NOATIME_FL 0x00000080 /* do not update atime */ |
| 368 | /* Reserved for compression usage... */ |
| 369 | #define FS_DIRTY_FL 0x00000100 |
| 370 | #define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ |
| 371 | #define FS_NOCOMP_FL 0x00000400 /* Don't compress */ |
| 372 | /* End compression flags --- maybe not all used */ |
| 373 | #define FS_ENCRYPT_FL 0x00000800 /* Encrypted file */ |
| 374 | #define FS_BTREE_FL 0x00001000 /* btree format dir */ |
| 375 | #define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ |
| 376 | #define FS_IMAGIC_FL 0x00002000 /* AFS directory */ |
| 377 | #define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ |
| 378 | #define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ |
| 379 | #define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ |
| 380 | #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ |
| 381 | #define FS_HUGE_FILE_FL 0x00040000 /* Reserved for ext4 */ |
| 382 | #define FS_EXTENT_FL 0x00080000 /* Extents */ |
| 383 | #define FS_VERITY_FL 0x00100000 /* Verity protected inode */ |
| 384 | #define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ |
| 385 | #define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */ |
| 386 | #define FS_NOCOW_FL 0x00800000 /* Do not cow file */ |
| 387 | #define FS_DAX_FL 0x02000000 /* Inode is DAX */ |
| 388 | #define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */ |
| 389 | #define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ |
| 390 | #define FS_CASEFOLD_FL 0x40000000 /* Folder is case insensitive */ |
| 391 | #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ |
| 392 | |
| 393 | #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ |
| 394 | #define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ |
| 395 | |
| 396 | |
| 397 | #define SYNC_FILE_RANGE_WAIT_BEFORE 1 |
| 398 | #define SYNC_FILE_RANGE_WRITE 2 |
| 399 | #define SYNC_FILE_RANGE_WAIT_AFTER 4 |
| 400 | #define SYNC_FILE_RANGE_WRITE_AND_WAIT (SYNC_FILE_RANGE_WRITE | \ |
| 401 | SYNC_FILE_RANGE_WAIT_BEFORE | \ |
| 402 | SYNC_FILE_RANGE_WAIT_AFTER) |
| 403 | |
| 404 | /* |
| 405 | * Flags for preadv2/pwritev2: |
| 406 | */ |
| 407 | |
| 408 | typedef int __bitwise __kernel_rwf_t; |
| 409 | |
| 410 | /* high priority request, poll if possible */ |
| 411 | #define RWF_HIPRI ((__force __kernel_rwf_t)0x00000001) |
| 412 | |
| 413 | /* per-IO O_DSYNC */ |
| 414 | #define RWF_DSYNC ((__force __kernel_rwf_t)0x00000002) |
| 415 | |
| 416 | /* per-IO O_SYNC */ |
| 417 | #define RWF_SYNC ((__force __kernel_rwf_t)0x00000004) |
| 418 | |
| 419 | /* per-IO, return -EAGAIN if operation would block */ |
| 420 | #define RWF_NOWAIT ((__force __kernel_rwf_t)0x00000008) |
| 421 | |
| 422 | /* per-IO O_APPEND */ |
| 423 | #define RWF_APPEND ((__force __kernel_rwf_t)0x00000010) |
| 424 | |
| 425 | /* per-IO negation of O_APPEND */ |
| 426 | #define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020) |
| 427 | |
| 428 | /* Atomic Write */ |
| 429 | #define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040) |
| 430 | |
| 431 | /* buffered IO that drops the cache after reading or writing data */ |
| 432 | #define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080) |
| 433 | |
| 434 | /* prevent pipe and socket writes from raising SIGPIPE */ |
| 435 | #define RWF_NOSIGNAL ((__force __kernel_rwf_t)0x00000100) |
| 436 | |
| 437 | /* mask of flags supported by the kernel */ |
| 438 | #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ |
| 439 | RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\ |
| 440 | RWF_DONTCACHE | RWF_NOSIGNAL) |
| 441 | |
| 442 | #define PROCFS_IOCTL_MAGIC 'f' |
| 443 | |
| 444 | /* Pagemap ioctl */ |
| 445 | #define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg) |
| 446 | |
| 447 | /* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */ |
| 448 | #define PAGE_IS_WPALLOWED (1 << 0) |
| 449 | #define PAGE_IS_WRITTEN (1 << 1) |
| 450 | #define PAGE_IS_FILE (1 << 2) |
| 451 | #define PAGE_IS_PRESENT (1 << 3) |
| 452 | #define PAGE_IS_SWAPPED (1 << 4) |
| 453 | #define PAGE_IS_PFNZERO (1 << 5) |
| 454 | #define PAGE_IS_HUGE (1 << 6) |
| 455 | #define PAGE_IS_SOFT_DIRTY (1 << 7) |
| 456 | #define PAGE_IS_GUARD (1 << 8) |
| 457 | |
| 458 | /* |
| 459 | * struct page_region - Page region with flags |
| 460 | * @start: Start of the region |
| 461 | * @end: End of the region (exclusive) |
| 462 | * @categories: PAGE_IS_* category bitmask for the region |
| 463 | */ |
| 464 | struct page_region { |
| 465 | __u64 start; |
| 466 | __u64 end; |
| 467 | __u64 categories; |
| 468 | }; |
| 469 | |
| 470 | /* Flags for PAGEMAP_SCAN ioctl */ |
| 471 | #define PM_SCAN_WP_MATCHING (1 << 0) /* Write protect the pages matched. */ |
| 472 | #define PM_SCAN_CHECK_WPASYNC (1 << 1) /* Abort the scan when a non-WP-enabled page is found. */ |
| 473 | |
| 474 | /* |
| 475 | * struct pm_scan_arg - Pagemap ioctl argument |
| 476 | * @size: Size of the structure |
| 477 | * @flags: Flags for the IOCTL |
| 478 | * @start: Starting address of the region |
| 479 | * @end: Ending address of the region |
| 480 | * @walk_end Address where the scan stopped (written by kernel). |
| 481 | * walk_end == end (address tags cleared) informs that the scan completed on entire range. |
| 482 | * @vec: Address of page_region struct array for output |
| 483 | * @vec_len: Length of the page_region struct array |
| 484 | * @max_pages: Optional limit for number of returned pages (0 = disabled) |
| 485 | * @category_inverted: PAGE_IS_* categories which values match if 0 instead of 1 |
| 486 | * @category_mask: Skip pages for which any category doesn't match |
| 487 | * @category_anyof_mask: Skip pages for which no category matches |
| 488 | * @return_mask: PAGE_IS_* categories that are to be reported in `page_region`s returned |
| 489 | */ |
| 490 | struct pm_scan_arg { |
| 491 | __u64 size; |
| 492 | __u64 flags; |
| 493 | __u64 start; |
| 494 | __u64 end; |
| 495 | __u64 walk_end; |
| 496 | __u64 vec; |
| 497 | __u64 vec_len; |
| 498 | __u64 max_pages; |
| 499 | __u64 category_inverted; |
| 500 | __u64 category_mask; |
| 501 | __u64 category_anyof_mask; |
| 502 | __u64 return_mask; |
| 503 | }; |
| 504 | |
| 505 | /* /proc/<pid>/maps ioctl */ |
| 506 | #define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query) |
| 507 | |
| 508 | enum procmap_query_flags { |
| 509 | /* |
| 510 | * VMA permission flags. |
| 511 | * |
| 512 | * Can be used as part of procmap_query.query_flags field to look up |
| 513 | * only VMAs satisfying specified subset of permissions. E.g., specifying |
| 514 | * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs, |
| 515 | * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only |
| 516 | * return read/write VMAs, though both executable/non-executable and |
| 517 | * private/shared will be ignored. |
| 518 | * |
| 519 | * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags |
| 520 | * field to specify actual VMA permissions. |
| 521 | */ |
| 522 | PROCMAP_QUERY_VMA_READABLE = 0x01, |
| 523 | PROCMAP_QUERY_VMA_WRITABLE = 0x02, |
| 524 | PROCMAP_QUERY_VMA_EXECUTABLE = 0x04, |
| 525 | PROCMAP_QUERY_VMA_SHARED = 0x08, |
| 526 | /* |
| 527 | * Query modifier flags. |
| 528 | * |
| 529 | * By default VMA that covers provided address is returned, or -ENOENT |
| 530 | * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest |
| 531 | * VMA with vma_start > addr will be returned if no covering VMA is |
| 532 | * found. |
| 533 | * |
| 534 | * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that |
| 535 | * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA |
| 536 | * to iterate all VMAs with file backing. |
| 537 | */ |
| 538 | PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10, |
| 539 | PROCMAP_QUERY_FILE_BACKED_VMA = 0x20, |
| 540 | }; |
| 541 | |
| 542 | /* |
| 543 | * Input/output argument structured passed into ioctl() call. It can be used |
| 544 | * to query a set of VMAs (Virtual Memory Areas) of a process. |
| 545 | * |
| 546 | * Each field can be one of three kinds, marked in a short comment to the |
| 547 | * right of the field: |
| 548 | * - "in", input argument, user has to provide this value, kernel doesn't modify it; |
| 549 | * - "out", output argument, kernel sets this field with VMA data; |
| 550 | * - "in/out", input and output argument; user provides initial value (used |
| 551 | * to specify maximum allowable buffer size), and kernel sets it to actual |
| 552 | * amount of data written (or zero, if there is no data). |
| 553 | * |
| 554 | * If matching VMA is found (according to criterias specified by |
| 555 | * query_addr/query_flags, all the out fields are filled out, and ioctl() |
| 556 | * returns 0. If there is no matching VMA, -ENOENT will be returned. |
| 557 | * In case of any other error, negative error code other than -ENOENT is |
| 558 | * returned. |
| 559 | * |
| 560 | * Most of the data is similar to the one returned as text in /proc/<pid>/maps |
| 561 | * file, but procmap_query provides more querying flexibility. There are no |
| 562 | * consistency guarantees between subsequent ioctl() calls, but data returned |
| 563 | * for matched VMA is self-consistent. |
| 564 | */ |
| 565 | struct procmap_query { |
| 566 | /* Query struct size, for backwards/forward compatibility */ |
| 567 | __u64 size; |
| 568 | /* |
| 569 | * Query flags, a combination of enum procmap_query_flags values. |
| 570 | * Defines query filtering and behavior, see enum procmap_query_flags. |
| 571 | * |
| 572 | * Input argument, provided by user. Kernel doesn't modify it. |
| 573 | */ |
| 574 | __u64 query_flags; /* in */ |
| 575 | /* |
| 576 | * Query address. By default, VMA that covers this address will |
| 577 | * be looked up. PROCMAP_QUERY_* flags above modify this default |
| 578 | * behavior further. |
| 579 | * |
| 580 | * Input argument, provided by user. Kernel doesn't modify it. |
| 581 | */ |
| 582 | __u64 query_addr; /* in */ |
| 583 | /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */ |
| 584 | __u64 vma_start; /* out */ |
| 585 | __u64 vma_end; /* out */ |
| 586 | /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */ |
| 587 | __u64 vma_flags; /* out */ |
| 588 | /* VMA backing page size granularity. */ |
| 589 | __u64 vma_page_size; /* out */ |
| 590 | /* |
| 591 | * VMA file offset. If VMA has file backing, this specifies offset |
| 592 | * within the file that VMA's start address corresponds to. |
| 593 | * Is set to zero if VMA has no backing file. |
| 594 | */ |
| 595 | __u64 vma_offset; /* out */ |
| 596 | /* Backing file's inode number, or zero, if VMA has no backing file. */ |
| 597 | __u64 inode; /* out */ |
| 598 | /* Backing file's device major/minor number, or zero, if VMA has no backing file. */ |
| 599 | __u32 dev_major; /* out */ |
| 600 | __u32 dev_minor; /* out */ |
| 601 | /* |
| 602 | * If set to non-zero value, signals the request to return VMA name |
| 603 | * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix |
| 604 | * appended, if file was unlinked from FS) for matched VMA. VMA name |
| 605 | * can also be some special name (e.g., "[heap]", "[stack]") or could |
| 606 | * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME). |
| 607 | * |
| 608 | * Kernel will set this field to zero, if VMA has no associated name. |
| 609 | * Otherwise kernel will return actual amount of bytes filled in |
| 610 | * user-supplied buffer (see vma_name_addr field below), including the |
| 611 | * terminating zero. |
| 612 | * |
| 613 | * If VMA name is longer that user-supplied maximum buffer size, |
| 614 | * -E2BIG error is returned. |
| 615 | * |
| 616 | * If this field is set to non-zero value, vma_name_addr should point |
| 617 | * to valid user space memory buffer of at least vma_name_size bytes. |
| 618 | * If set to zero, vma_name_addr should be set to zero as well |
| 619 | */ |
| 620 | __u32 vma_name_size; /* in/out */ |
| 621 | /* |
| 622 | * If set to non-zero value, signals the request to extract and return |
| 623 | * VMA's backing file's build ID, if the backing file is an ELF file |
| 624 | * and it contains embedded build ID. |
| 625 | * |
| 626 | * Kernel will set this field to zero, if VMA has no backing file, |
| 627 | * backing file is not an ELF file, or ELF file has no build ID |
| 628 | * embedded. |
| 629 | * |
| 630 | * Build ID is a binary value (not a string). Kernel will set |
| 631 | * build_id_size field to exact number of bytes used for build ID. |
| 632 | * If build ID is requested and present, but needs more bytes than |
| 633 | * user-supplied maximum buffer size (see build_id_addr field below), |
| 634 | * -E2BIG error will be returned. |
| 635 | * |
| 636 | * If this field is set to non-zero value, build_id_addr should point |
| 637 | * to valid user space memory buffer of at least build_id_size bytes. |
| 638 | * If set to zero, build_id_addr should be set to zero as well |
| 639 | */ |
| 640 | __u32 build_id_size; /* in/out */ |
| 641 | /* |
| 642 | * User-supplied address of a buffer of at least vma_name_size bytes |
| 643 | * for kernel to fill with matched VMA's name (see vma_name_size field |
| 644 | * description above for details). |
| 645 | * |
| 646 | * Should be set to zero if VMA name should not be returned. |
| 647 | */ |
| 648 | __u64 vma_name_addr; /* in */ |
| 649 | /* |
| 650 | * User-supplied address of a buffer of at least build_id_size bytes |
| 651 | * for kernel to fill with matched VMA's ELF build ID, if available |
| 652 | * (see build_id_size field description above for details). |
| 653 | * |
| 654 | * Should be set to zero if build ID should not be returned. |
| 655 | */ |
| 656 | __u64 build_id_addr; /* in */ |
| 657 | }; |
| 658 | |
| 659 | #endif /* _UAPI_LINUX_FS_H */ |
| 660 | |