1 /* vi: set sw=4 ts=4: */
3 * mkfs_ext2: utility to create EXT2 filesystem
4 * inspired by genext2fs
6 * Busybox'ed (2009) by Vladimir Dronnikov <dronnikov@gmail.com>
8 * Licensed under GPLv2, see file LICENSE in this tarball for details.
12 #include <linux/ext2_fs.h>
13 #include <sys/user.h> /* PAGE_SIZE */
15 # define PAGE_SIZE 4096
17 #include "volume_id/volume_id_internal.h"
19 #define ENABLE_FEATURE_MKFS_EXT2_RESERVED_GDT 0
20 #define ENABLE_FEATURE_MKFS_EXT2_DIR_INDEX 1
23 #define s_reserved_gdt_blocks s_padding1
24 #define s_mkfs_time s_reserved[0]
25 #define s_flags s_reserved[22]
26 #define EXT2_HASH_HALF_MD4 1
27 #define EXT2_FLAGS_SIGNED_HASH 0x0001
29 // whiteout: for writable overlays
30 //#define LINUX_S_IFWHT 0160000
31 //#define EXT2_FEATURE_INCOMPAT_WHITEOUT 0x0020
34 char BUG_wrong_field_size(void);
35 #define STORE_LE(field, value) \
37 if (sizeof(field) == 4) \
38 field = cpu_to_le32(value); \
39 else if (sizeof(field) == 2) \
40 field = cpu_to_le16(value); \
41 else if (sizeof(field) == 1) \
44 BUG_wrong_field_size(); \
47 #define FETCH_LE32(field) \
48 (sizeof(field) == 4 ? cpu_to_le32(field) : BUG_wrong_field_size())
50 // All fields are little-endian
69 static unsigned int_log2(unsigned arg)
72 while ((arg >>= 1) != 0)
77 // taken from mkfs_minix.c. libbb candidate?
78 // why "uint64_t size"? we never use it for anything >32 bits
79 static uint32_t div_roundup(uint64_t size, uint32_t n)
82 uint32_t res = size / n;
88 static void allocate(uint8_t *bitmap, uint32_t blocksize, uint32_t start, uint32_t end)
92 //bb_info_msg("ALLOC: [%u][%u][%u]: [%u-%u]:=[%x],[%x]", blocksize, start, end, start/8, blocksize - end/8 - 1, (1 << (start & 7)) - 1, (uint8_t)(0xFF00 >> (end & 7)));
93 memset(bitmap, 0, blocksize);
95 memset(bitmap, 0xFF, i);
96 bitmap[i] = (1 << (start & 7)) - 1; //0..7 => 00000000..01111111
98 bitmap[blocksize - i - 1] |= 0x7F00 >> (end & 7); //0..7 => 00000000..11111110
99 memset(bitmap + blocksize - i, 0xFF, i); // N.B. no overflow here!
102 static uint32_t has_super(uint32_t x)
104 // 0, 1 and powers of 3, 5, 7 up to 2^32 limit
105 static const uint32_t supers[] = {
106 0, 1, 3, 5, 7, 9, 25, 27, 49, 81, 125, 243, 343, 625, 729,
107 2187, 2401, 3125, 6561, 15625, 16807, 19683, 59049, 78125,
108 117649, 177147, 390625, 531441, 823543, 1594323, 1953125,
109 4782969, 5764801, 9765625, 14348907, 40353607, 43046721,
110 48828125, 129140163, 244140625, 282475249, 387420489,
111 1162261467, 1220703125, 1977326743, 3486784401/* >2^31 */,
113 const uint32_t *sp = supers + ARRAY_SIZE(supers);
123 /* Standard mke2fs 1.41.9:
124 * Usage: mke2fs [-c|-l filename] [-b block-size] [-f fragment-size]
125 * [-i bytes-per-inode] [-I inode-size] [-J journal-options]
126 * [-G meta group size] [-N number-of-inodes]
127 * [-m reserved-blocks-percentage] [-o creator-os]
128 * [-g blocks-per-group] [-L volume-label] [-M last-mounted-directory]
129 * [-O feature[,...]] [-r fs-revision] [-E extended-option[,...]]
130 * [-T fs-type] [-U UUID] [-jnqvFSV] device [blocks-count]
132 // N.B. not commented below options are taken and silently ignored
136 OPT_b = 1 << 2, // block size, in bytes
138 OPT_i = 1 << 4, // bytes per inode
143 OPT_m = 1 << 9, // percentage of blocks reserved for superuser
146 OPT_L = 1 << 12, // label
154 OPT_n = 1 << 20, // dry run: do not write anything
159 //OPT_V = 1 << 25, // -V version. bbox applets don't support that
162 #define fd 3 /* predefined output descriptor */
164 static void PUT(uint64_t off, void *buf, uint32_t size)
166 // bb_info_msg("PUT[%llu]:[%u]", off, size);
167 xlseek(fd, off, SEEK_SET);
168 xwrite(fd, buf, size);
171 int mkfs_ext2_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
172 int mkfs_ext2_main(int argc UNUSED_PARAM, char **argv)
176 unsigned blocksize, blocksize_log2;
177 unsigned reserved_percent = 5;
178 unsigned long long kilobytes;
179 uint32_t nblocks, nblocks_full, nreserved;
181 uint32_t bytes_per_inode;
182 uint32_t first_data_block;
183 uint32_t inodes_per_group;
184 uint32_t gdtsz, itsz;
187 const char *label = "";
189 struct ext2_super_block *sb; // superblock
190 struct ext2_group_desc *gd; // group descriptors
191 struct ext2_inode *inode;
192 struct ext2_dir *dir;
195 opt_complementary = "-1:b+:m+:i+";
196 opts = getopt32(argv, "cl:b:f:i:I:J:G:N:m:o:g:L:M:O:r:E:T:U:jnqvFS",
197 NULL, &bs, NULL, &bpi, NULL, NULL, NULL, NULL,
198 &reserved_percent, NULL, NULL, &label, NULL, NULL, NULL, NULL, NULL, NULL);
199 argv += optind; // argv[0] -- device
201 // check the device is a block device
202 xmove_fd(xopen(argv[0], O_WRONLY), fd);
204 if (!S_ISBLK(st.st_mode) && !(opts & OPT_F))
205 bb_error_msg_and_die("not a block device");
207 // check if it is mounted
208 // N.B. what if we format a file? find_mount_point will return false negative since
209 // it is loop block device which mounted!
210 if (find_mount_point(argv[0], 0))
211 bb_error_msg_and_die("can't format mounted filesystem");
213 // open the device, get size in kbytes
215 kilobytes = xatoull(argv[1]);
217 kilobytes = (uoff_t)xlseek(fd, 0, SEEK_END) / 1024;
220 bytes_per_inode = 16384;
221 if (kilobytes < 512*1024)
222 bytes_per_inode = 4096;
223 if (kilobytes < 3*1024)
224 bytes_per_inode = 8192;
226 bytes_per_inode = bpi;
228 // Determine block size
229 // block size is a multiple of 1024
231 if (kilobytes >= 512*1024) // mke2fs 1.41.9 compat
233 if (EXT2_MAX_BLOCK_SIZE > 4096) {
234 // kilobytes >> 22 == size in 4gigabyte chunks.
235 // if size >= 16k gigs, blocksize must be increased.
236 // Try "mke2fs -F image $((16 * 1024*1024*1024))"
237 while ((kilobytes >> 22) >= blocksize)
242 if (blocksize < EXT2_MIN_BLOCK_SIZE
243 || blocksize > EXT2_MAX_BLOCK_SIZE
244 || (blocksize & (blocksize - 1)) // not power of 2
246 bb_error_msg_and_die("blocksize %u is bad", blocksize);
248 // number of bits in one block, i.e. 8*blocksize
249 #define blocks_per_group (8 * blocksize)
250 first_data_block = (EXT2_MIN_BLOCK_SIZE == blocksize);
251 blocksize_log2 = int_log2(blocksize);
253 // Determine number of blocks
254 kilobytes >>= (blocksize_log2 - EXT2_MIN_BLOCK_LOG_SIZE);
256 if (nblocks != kilobytes)
257 bb_error_msg_and_die("block count doesn't fit in 32 bits");
258 #define kilobytes kilobytes_unused_after_this
259 if (blocksize < PAGE_SIZE)
260 nblocks &= ~((PAGE_SIZE >> blocksize_log2)-1);
261 // Experimentally, standard mke2fs won't work on images smaller than 60k
263 bb_error_msg_and_die("need >= 60 blocks");
265 // How many reserved blocks?
266 if (reserved_percent > 50)
267 bb_error_msg_and_die("-%c is bad", 'm');
268 //nreserved = div_roundup((uint64_t) nblocks * reserved_percent, 100);
269 nreserved = (uint64_t)nblocks * reserved_percent / 100;
271 // N.B. killing e2fsprogs feature! Unused blocks don't account in calculations
272 nblocks_full = nblocks;
274 // If last block group is too small, nblocks may be decreased in order
275 // to discard it, and control returns here to recalculate some
277 // Note: blocksize and bytes_per_inode are never recalculated.
279 // N.B. a block group can have no more than blocks_per_group blocks
280 ngroups = div_roundup(nblocks - first_data_block, blocks_per_group);
282 bb_error_msg_and_die("ngroups");
284 gdtsz = div_roundup(ngroups, blocksize / sizeof(*gd));
285 // TODO: reserved blocks must be marked as such in the bitmaps,
286 // or resulting filesystem is corrupt
287 if (ENABLE_FEATURE_MKFS_EXT2_RESERVED_GDT) {
289 * From e2fsprogs: Calculate the number of GDT blocks to reserve for online
291 * The absolute maximum number of GDT blocks we can reserve is determined by
292 * the number of block pointers that can fit into a single block.
293 * We set it at 1024x the current filesystem size, or
294 * the upper block count limit (2^32), whichever is lower.
296 uint32_t rgdtsz = 0xFFFFFFFF; // maximum block number
297 if (nblocks < rgdtsz / 1024)
298 rgdtsz = nblocks * 1024;
299 rgdtsz = div_roundup(rgdtsz - first_data_block, blocks_per_group);
300 rgdtsz = div_roundup(rgdtsz, blocksize / sizeof(*gd)) - gdtsz;
301 if (rgdtsz > blocksize / sizeof(uint32_t))
302 rgdtsz = blocksize / sizeof(uint32_t);
303 //TODO: STORE_LE(sb->s_reserved_gdt_blocks, rgdtsz);
308 // N.B. e2fsprogs does as follows!
309 // ninodes is the total number of inodes (files) in the file system
310 uint32_t ninodes = nblocks_full / (blocksize >= 4096 ? 1 : 4096 / blocksize);
311 uint32_t overhead, remainder;
312 if (ninodes < EXT2_GOOD_OLD_FIRST_INO+1)
313 ninodes = EXT2_GOOD_OLD_FIRST_INO+1;
314 inodes_per_group = div_roundup(ninodes, ngroups);
315 // minimum number because the first EXT2_GOOD_OLD_FIRST_INO-1 are reserved
316 if (inodes_per_group < 16)
317 inodes_per_group = 16;
319 // a block group can have no more than 8*blocksize inodes
320 if (inodes_per_group > blocks_per_group)
321 inodes_per_group = blocks_per_group;
322 // adjust inodes per group so they completely fill the inode table blocks in the descriptor
323 inodes_per_group = (div_roundup(inodes_per_group * sizeof(*inode), blocksize) * blocksize) / sizeof(*inode);
324 // make sure the number of inodes per group is a multiple of 8
325 inodes_per_group &= ~7;
326 itsz = div_roundup(inodes_per_group * sizeof(*inode), blocksize);
328 // the last group needs more attention: isn't it too small for possible overhead?
329 overhead = (has_super(ngroups - 1) ? (1/*sb*/ + gdtsz) : 0) + 1/*bbmp*/ + 1/*ibmp*/ + itsz;
330 remainder = (nblocks - first_data_block) % blocks_per_group;
331 if ((1 == ngroups) && remainder && (remainder < overhead))
332 bb_error_msg_and_die("way small device");
333 if (remainder && (remainder < overhead + 50/* e2fsprogs hardcoded */)) {
334 //bb_info_msg("CHOP[%u]", remainder);
335 nblocks -= remainder;
341 if (nblocks_full - nblocks)
342 printf("warning: %u blocks unused\n\n", nblocks_full - nblocks);
344 "Filesystem label=%s\n"
346 "Block size=%u (log=%u)\n"
347 "Fragment size=%u (log=%u)\n"
348 "%u inodes, %u blocks\n"
349 "%u blocks (%u%%) reserved for the super user\n"
350 "First data block=%u\n"
351 "Maximum filesystem blocks=%u\n"
353 "%u blocks per group, %u fragments per group\n"
354 "%u inodes per group"
356 , blocksize, blocksize_log2 - EXT2_MIN_BLOCK_LOG_SIZE
357 , blocksize, blocksize_log2 - EXT2_MIN_BLOCK_LOG_SIZE
358 , inodes_per_group * ngroups, nblocks
359 , nreserved, reserved_percent
361 , gdtsz * (blocksize / sizeof(*gd)) * blocks_per_group
363 , blocks_per_group, blocks_per_group
367 const char *fmt = "\nSuperblock backups stored on blocks:\n"
369 pos = first_data_block;
370 for (i = 1; i < ngroups; i++) {
371 pos += blocks_per_group;
373 printf(fmt, (unsigned)pos);
380 // dry run? -> we are done
384 // TODO: 3/5 refuse if mounted
385 // TODO: 4/5 compat options
386 // TODO: 1/5 sanity checks
387 // TODO: 0/5 more verbose error messages
388 // TODO: 4/5 bigendianness: recheck, wait for ARM reporters
389 // TODO: 2/5 reserved GDT: how to mark but not allocate?
390 // TODO: 3/5 dir_index?
392 // fill the superblock
393 sb = xzalloc(blocksize);
394 STORE_LE(sb->s_rev_level, 1); // revision 1 filesystem
395 STORE_LE(sb->s_magic, EXT2_SUPER_MAGIC);
396 STORE_LE(sb->s_inode_size, sizeof(*inode));
397 STORE_LE(sb->s_first_ino, EXT2_GOOD_OLD_FIRST_INO);
398 STORE_LE(sb->s_log_block_size, blocksize_log2 - EXT2_MIN_BLOCK_LOG_SIZE);
399 STORE_LE(sb->s_log_frag_size, blocksize_log2 - EXT2_MIN_BLOCK_LOG_SIZE);
400 // first 1024 bytes of the device are for boot record. If block size is 1024 bytes, then
401 // the first block available for data is 1, otherwise 0
402 STORE_LE(sb->s_first_data_block, first_data_block); // 0 or 1
403 // block and inode bitmaps occupy no more than one block, so maximum number of blocks is
404 STORE_LE(sb->s_blocks_per_group, blocks_per_group);
405 STORE_LE(sb->s_frags_per_group, blocks_per_group);
407 STORE_LE(sb->s_blocks_count, nblocks);
408 // reserve blocks for superuser
409 STORE_LE(sb->s_r_blocks_count, nreserved);
411 STORE_LE(sb->s_inodes_per_group, inodes_per_group);
412 STORE_LE(sb->s_inodes_count, inodes_per_group * ngroups);
413 STORE_LE(sb->s_free_inodes_count, inodes_per_group * ngroups - EXT2_GOOD_OLD_FIRST_INO);
415 timestamp = time(NULL);
416 STORE_LE(sb->s_mkfs_time, timestamp);
417 STORE_LE(sb->s_wtime, timestamp);
418 STORE_LE(sb->s_lastcheck, timestamp);
420 STORE_LE(sb->s_state, 1); // TODO: what's 1?
421 STORE_LE(sb->s_creator_os, EXT2_OS_LINUX);
422 STORE_LE(sb->s_checkinterval, 24*60*60 * 180); // 180 days
423 STORE_LE(sb->s_errors, EXT2_ERRORS_DEFAULT);
424 STORE_LE(sb->s_feature_compat, EXT2_FEATURE_COMPAT_SUPP
425 | (EXT2_FEATURE_COMPAT_RESIZE_INO * ENABLE_FEATURE_MKFS_EXT2_RESERVED_GDT)
426 | (EXT2_FEATURE_COMPAT_DIR_INDEX * ENABLE_FEATURE_MKFS_EXT2_DIR_INDEX)
428 // e2fsck from 1.41.9 doesn't like EXT2_FEATURE_INCOMPAT_WHITEOUT
429 STORE_LE(sb->s_feature_incompat, EXT2_FEATURE_INCOMPAT_FILETYPE);// | EXT2_FEATURE_INCOMPAT_WHITEOUT;
430 STORE_LE(sb->s_feature_ro_compat, EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER);
431 STORE_LE(sb->s_flags, EXT2_FLAGS_SIGNED_HASH * ENABLE_FEATURE_MKFS_EXT2_DIR_INDEX);
432 generate_uuid(sb->s_uuid);
433 if (ENABLE_FEATURE_MKFS_EXT2_DIR_INDEX) {
434 STORE_LE(sb->s_def_hash_version, EXT2_HASH_HALF_MD4);
435 generate_uuid((uint8_t *)sb->s_hash_seed);
438 * From e2fsprogs: add "jitter" to the superblock's check interval so that we
439 * don't check all the filesystems at the same time. We use a
440 * kludgy hack of using the UUID to derive a random jitter value.
442 STORE_LE(sb->s_max_mnt_count, EXT2_DFL_MAX_MNT_COUNT
443 + (sb->s_uuid[ARRAY_SIZE(sb->s_uuid)-1] % EXT2_DFL_MAX_MNT_COUNT));
446 safe_strncpy((char *)sb->s_volume_name, label, sizeof(sb->s_volume_name));
448 // fill group descriptors
449 gd = xzalloc(gdtsz * blocksize);
450 buf = xmalloc(blocksize);
451 sb->s_free_blocks_count = 0;
452 for (i = 0, pos = first_data_block, n = nblocks - first_data_block;
454 i++, pos += blocks_per_group, n -= blocks_per_group
456 uint32_t overhead = pos + (has_super(i) ? (1/*sb*/ + gdtsz) : 0);
458 STORE_LE(gd[i].bg_block_bitmap, overhead + 0);
459 STORE_LE(gd[i].bg_inode_bitmap, overhead + 1);
460 STORE_LE(gd[i].bg_inode_table, overhead + 2);
461 overhead = overhead - pos + 1/*bbmp*/ + 1/*ibmp*/ + itsz;
462 gd[i].bg_free_inodes_count = inodes_per_group;
463 //STORE_LE(gd[i].bg_used_dirs_count, 0);
464 // N.B. both root and lost+found dirs are within the first block group, thus +2
467 STORE_LE(gd[i].bg_used_dirs_count, 2);
468 gd[i].bg_free_inodes_count -= EXT2_GOOD_OLD_FIRST_INO;
471 // mark preallocated blocks as allocated
472 fb = (n < blocks_per_group ? n : blocks_per_group) - overhead;
473 //bb_info_msg("ALLOC: [%u][%u][%u]", blocksize, overhead, blocks_per_group - (fb + overhead));
474 allocate(buf, blocksize,
476 blocks_per_group - (fb + overhead)
479 PUT((uint64_t)(FETCH_LE32(gd[i].bg_block_bitmap)) * blocksize, buf, blocksize);
480 STORE_LE(gd[i].bg_free_blocks_count, fb);
482 // mark preallocated inodes as allocated
483 allocate(buf, blocksize,
484 inodes_per_group - gd[i].bg_free_inodes_count,
485 blocks_per_group - inodes_per_group
488 //PUT((uint64_t)(FETCH_LE32(gd[i].bg_block_bitmap)) * blocksize, buf, blocksize);
489 //but it's right after block bitmap, so we can just:
490 xwrite(fd, buf, blocksize);
491 STORE_LE(gd[i].bg_free_inodes_count, gd[i].bg_free_inodes_count);
493 // count overall free blocks
494 sb->s_free_blocks_count += fb;
496 STORE_LE(sb->s_free_blocks_count, sb->s_free_blocks_count);
498 // dump filesystem skeleton structures
499 // printf("Writing superblocks and filesystem accounting information: ");
500 for (i = 0, pos = first_data_block; i < ngroups; i++, pos += blocks_per_group) {
501 // dump superblock and group descriptors and their backups
503 // N.B. 1024 byte blocks are special
504 PUT(((uint64_t)pos * blocksize) + ((0 == i && 0 == first_data_block) ? 1024 : 0), sb, 1024);//blocksize);
505 PUT(((uint64_t)pos * blocksize) + blocksize, gd, gdtsz * blocksize);
510 memset(buf, 0, blocksize);
511 PUT(0, buf, 1024); // N.B. 1024 <= blocksize, so buf[0..1023] contains zeros
513 for (i = 0; i < ngroups; ++i)
514 for (n = 0; n < itsz; ++n)
515 PUT((uint64_t)(FETCH_LE32(gd[i].bg_inode_table) + n) * blocksize, buf, blocksize);
517 // prepare directory inode
518 inode = (struct ext2_inode *)buf;
519 STORE_LE(inode->i_mode, S_IFDIR | S_IRWXU | S_IRGRP | S_IROTH | S_IXGRP | S_IXOTH);
520 STORE_LE(inode->i_mtime, timestamp);
521 STORE_LE(inode->i_atime, timestamp);
522 STORE_LE(inode->i_ctime, timestamp);
523 STORE_LE(inode->i_size, blocksize);
524 // N.B. inode->i_blocks stores the number of 512 byte data blocks. Why on Earth?!
525 STORE_LE(inode->i_blocks, blocksize / 512);
527 // dump root dir inode
528 STORE_LE(inode->i_links_count, 3); // "/.", "/..", "/lost+found/.." point to this inode
529 STORE_LE(inode->i_block[0], FETCH_LE32(gd[0].bg_inode_table) + itsz);
530 PUT(((uint64_t)FETCH_LE32(gd[0].bg_inode_table) * blocksize) + (EXT2_ROOT_INO-1) * sizeof(*inode), buf, sizeof(*inode));
532 // dump lost+found dir inode
533 STORE_LE(inode->i_links_count, 2); // both "/lost+found" and "/lost+found/." point to this inode
534 STORE_LE(inode->i_block[0], inode->i_block[0]+1); // use next block
535 PUT(((uint64_t)FETCH_LE32(gd[0].bg_inode_table) * blocksize) + (EXT2_GOOD_OLD_FIRST_INO-1) * sizeof(*inode), buf, sizeof(*inode));
538 memset(buf, 0, blocksize);
539 dir = (struct ext2_dir *)buf;
541 // dump lost+found dir block
542 STORE_LE(dir->inode1, EXT2_GOOD_OLD_FIRST_INO);
543 STORE_LE(dir->rec_len1, 12);
544 STORE_LE(dir->name_len1, 1);
545 STORE_LE(dir->file_type1, EXT2_FT_DIR);
547 STORE_LE(dir->inode2, EXT2_ROOT_INO);
548 STORE_LE(dir->rec_len2, blocksize - 12);
549 STORE_LE(dir->name_len2, 2);
550 STORE_LE(dir->file_type2, EXT2_FT_DIR);
551 dir->name2[0] = '.'; dir->name2[1] = '.';
552 PUT((uint64_t)(FETCH_LE32(gd[0].bg_inode_table) + itsz + 1) * blocksize, buf, blocksize);
554 // dump root dir block
555 STORE_LE(dir->inode1, EXT2_ROOT_INO);
556 STORE_LE(dir->rec_len2, 12);
557 STORE_LE(dir->inode3, EXT2_GOOD_OLD_FIRST_INO);
558 STORE_LE(dir->rec_len3, blocksize - 12 - 12);
559 STORE_LE(dir->name_len3, 10);
560 STORE_LE(dir->file_type3, EXT2_FT_DIR);
561 strcpy(dir->name3, "lost+found");
562 PUT((uint64_t)(FETCH_LE32(gd[0].bg_inode_table) + itsz + 0) * blocksize, buf, blocksize);
566 if (ENABLE_FEATURE_CLEAN_UP) {