FS
Mass-Storage Structure
disk
:掉电保存,永久存储
memory
:掉电即无
RAID
:Redundant Array of Independent Disks
提高可靠性,冗余磁盘来降低出错的概率
-
Data Mirroring
数据镜像
-
Data Striping
并行读取文件
-
Error Code Correctiong
校错
RAID Levels:
-
RAID 0:将数据splits成data,存到block上
只提高了性能,但没有提高可靠性
-
RAID 1:将所有的文件都存储两份到所有磁盘上
优点:提高了可靠性
缺点:浪费空间,所有的文件都有备份
-
RAID 2:将文件以bit-level拆开,使用海明码纠错
海明码:4 bit data + 3 bit parity,使用7个磁盘
缺点:bit-level 读
-
RAID 3:同样是bit-level拆开,使用xor纠错
优点:可以加快读的速度
缺点:恢复的时间比较长
-
RAID 4,5,6:
- 4:使用block作为切分,整体计算校验码
- 5:使校验码均匀分布在所有磁盘中
- 6:与5相同,但扩展了额外的校验快
I/O Management
-
polling:轮询
CPU主动访问
-
interrupt:中断
CPU被访问
Hardware support:
- I/O instruction
- register
- memory-support
Everything is file.
// /dev/ptmx
static struct file_operations ptmx_fops __ro_after_init;
static void __init unix98_pty_init(void)
{
ptm_driver = tty_alloc_driver(NR_UNIX98_PTY_MAX,
TTY_DRIVER_RESET_TERMIOS |
TTY_DRIVER_REAL_RAW |
TTY_DRIVER_DYNAMIC_DEV |
TTY_DRIVER_DEVPTS_MEM |
TTY_DRIVER_DYNAMIC_ALLOC);
if (IS_ERR(ptm_driver))
panic("Couldn't allocate Unix98 ptm driver");
pts_driver = tty_alloc_driver(NR_UNIX98_PTY_MAX,
TTY_DRIVER_RESET_TERMIOS |
TTY_DRIVER_REAL_RAW |
TTY_DRIVER_DYNAMIC_DEV |
TTY_DRIVER_DEVPTS_MEM |
TTY_DRIVER_DYNAMIC_ALLOC);
if (IS_ERR(pts_driver))
panic("Couldn't allocate Unix98 pts driver");
ptm_driver->driver_name = "pty_master";
ptm_driver->name = "ptm";
ptm_driver->major = UNIX98_PTY_MASTER_MAJOR;
ptm_driver->minor_start = 0;
ptm_driver->type = TTY_DRIVER_TYPE_PTY;
ptm_driver->subtype = PTY_TYPE_MASTER;
ptm_driver->init_termios = tty_std_termios;
ptm_driver->init_termios.c_iflag = 0;
ptm_driver->init_termios.c_oflag = 0;
ptm_driver->init_termios.c_cflag = B38400 | CS8 | CREAD;
ptm_driver->init_termios.c_lflag = 0;
ptm_driver->init_termios.c_ispeed = 38400;
ptm_driver->init_termios.c_ospeed = 38400;
ptm_driver->other = pts_driver;
tty_set_operations(ptm_driver, &ptm_unix98_ops);
pts_driver->driver_name = "pty_slave";
pts_driver->name = "pts";
pts_driver->major = UNIX98_PTY_SLAVE_MAJOR;
pts_driver->minor_start = 0;
pts_driver->type = TTY_DRIVER_TYPE_PTY;
pts_driver->subtype = PTY_TYPE_SLAVE;
pts_driver->init_termios = tty_std_termios;
pts_driver->init_termios.c_cflag = B38400 | CS8 | CREAD;
pts_driver->init_termios.c_ispeed = 38400;
pts_driver->init_termios.c_ospeed = 38400;
pts_driver->other = ptm_driver;
tty_set_operations(pts_driver, &pty_unix98_ops);
if (tty_register_driver(ptm_driver))
panic("Couldn't register Unix98 ptm driver");
if (tty_register_driver(pts_driver))
panic("Couldn't register Unix98 pts driver");
/* Now create the /dev/ptmx special device */
tty_default_fops(&ptmx_fops);
ptmx_fops.open = ptmx_open;
cdev_init(&ptmx_cdev, &ptmx_fops);
if (cdev_add(&ptmx_cdev, MKDEV(TTYAUX_MAJOR, 2), 1) ||
register_chrdev_region(MKDEV(TTYAUX_MAJOR, 2), 1, "/dev/ptmx") < 0)
panic("Couldn't register /dev/ptmx driver");
device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 2), NULL, "ptmx");
}
//file_operation
struct file_operations {
struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *,
unsigned int flags);
int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
unsigned long mmap_supported_flags;
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
int (*release) (struct inode *, struct file *);
int (*fsync) (struct file *, loff_t, loff_t, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
int (*flock) (struct file *, int, struct file_lock *);
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
int (*setlease)(struct file *, long, struct file_lock **, void **);
long (*fallocate)(struct file *file, int mode, loff_t offset,
loff_t len);
void (*show_fdinfo)(struct seq_file *m, struct file *f);
#ifndef CONFIG_MMU
unsigned (*mmap_capabilities)(struct file *);
#endif
ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
loff_t, size_t, unsigned int);
loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags);
int (*fadvise)(struct file *, loff_t, loff_t, int);
int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *,
unsigned int poll_flags);
} __randomize_layout;
使用文件的读写,来操作对应的硬件
在上述的函数中,将对应的IO
设备和和文件指针绑定起来,作为initialization
使用不同的函数指针,来针对不同的IO
设备做处理
ioctl:
File system
使用文件系统作存储和IO
的抽象
File: a contigurous logical space for storing information
-
proc file system:
在磁盘上没有对应的文件,称为in memory file system
内存中文件系统,保存了程序的运行信息
文件的组成:
-
name
-
identifier
-
type
在
win
中,使用文件扩展名来判断在
Linux
中使用magic number
来区分文件类型,没有后缀名一说 -
location
-
size
-
protection
-
structure
有的没有具体的结构,有的会有
database
的结构-
directory structure:
文件夹,包含文件信息的目录集合可以
delete, list
-
-
time, data and user identification
文件操作:
-
create
-
open:将文件的元信息载入到内存中
在打开时,会涉及到共享内存的问题
-
read/write:需要维护一个pointer
-
seek
-
close
-
delete
-
truncate
访问方式:
sequential access
direct access
单级文件系统 -> 二级文件系统 -> 树形文件系统 -> 无环图
所以就有了:
absolute path
relative path
无环图:添加了回指的指针,并且会检测每次添加是否有一个环产生
挂载:将文件系统载入到内存中,让file_operation
指向正确的函数
mount point:
挂载点
文件共享:uid, gid
remote file sharing:FTP
文件保护(访问保护):
-
ACL(Access control list)
$ chmod xxx u(user) g(group) o(other)
FS implementation
-
File system structure:
U盘使用的是
FAT
文件系统,linux
默认是Ext 2/4
存储在硬盘上,为程序提供了接口
-
File system layers:
app --> logic --> file origanization ---> basic file system ---> IO control ---> device
-
logical file system:
meta-data
directory
File control block:FCB
name
permissions
dates
owner, group, ACL
size
input
output
-
file organization module
输入逻辑块,输出物理块
-
basic file system(blocks)
buffers, caches
-
IO control
-
device
-
-
on disk structure:永久性的断电不丢的
-
volume control block
包括了文件系统的
meta-data
-
directory
-
per-file File Control Block
in memory structure:断电丢失的
-
mount table
-
directory cache
-
global open-file table
-
per-process open-file table
fid:文件在该表中的索引
针对文件操作:
-
create:
创建一个新的
FCB
-
open:
首先在
global
中搜索是否已经打开-
打开:创建接口
-
关闭:将
FCB
载入到内存中,并写入到per process open-file table和global open-file table
并对打开文件进行计数
-
-
close
关闭所有的打开文件后,关闭该文件入口
UFS
:使用inode来唯一指定文件
文件系统的挂载::
boot loader
volume control table -- memory --> mount table
virtual file system:
提供了类似于面向对象的实现
面向上层:提供统一的接口
面向下层:为不同的文件系统,使用不同的函数指针
super block
inode
dentry
file
目录的实现:
存储了以inode
为索引的目录项
- 线性链表
- 哈希表
物理存储空间的分配:
-
连续分配
external fragmentation
-
链表式分配
每次都要做一次链表式的查找
FAT-File allocation table
-
索引式分配
为每个文件提供一个索引表,顺序遍历索引表
表的大小限制了文件的大小:将该表链起来,依次来扩容
多级索引表:类似
page table
的实现
128 pointers:
125 data pointers + 1 single in + 1 double in + 1 triple in
管理free-space
-
bit map
-
linked free space
不能很轻易地拿到全部的
block
-
group and counting
提升file system
的性能:
- 将
data
和metadata
放到一起 cache
asynchronous writes
free-behind and read-ahead
reads frequently slower than write
使用page(file) cache
文件恢复:
-
back up
-
LSFS:Log Structured File Systems
将操作,先记到
log
中,之后再进行具体操作
In practice
external name:面向用户
internal name:面向机器
directory: external name <---> internal name
fd: file descriptor
-
hard link: a directory entry
无
inode
-
soft link: a file
有
inode
soft link
会更灵活