0%

Linux-Lab6-Block Device Drivers

Block Device Drivers

实验室目标:

  • 获取有关 Linux 上 I/O 子系统行为的知识
  • 块设备的结构和功能的实践活动
  • 通过解决练习,获得将API用于块设备的基本技能

块设备的特点是随机访问以固定大小的块组织的数据(此类设备的示例包括硬盘驱动器,CD-ROM驱动器,RAM磁盘等)

块设备的速度一般远高于字符设备的速度,它们的性能也很重要(这就是为什么 Linux 内核以不同的方式处理这两种类型的设备),因此,使用块设备比使用字符设备更复杂:

  • 字符设备具有单个当前位置
  • 块设备必须能够移动到设备中的任何位置以提供对数据的随机访问

为了简化块设备的使用,Linux 内核提供了一个称为块 I/O(或块层)子系统的整个子系统:

  • 从内核的角度来看,寻址的最小逻辑单元是块(尽管可以在扇区级别对物理设备进行寻址,但内核使用块执行所有磁盘操作)
  • 由于物理寻址的最小单位是扇区,因此块的大小必须是扇区大小的倍数(块的大小因所使用的文件系统而异,最常见的值是 512B、1KB 和 4KB)

Register a block I/O device

从 Linux 内核的 4.9 版开始,register_blkdev 调用是可选的,此函数执行的唯一操作是动态分配主要参数并在 /proc/devices 中创建条目(在将来的内核版本中,它可能会被删除,但是,大多数驱动程序仍然调用它)

通常,对寄存器函数的调用在模块初始化函数中执行,对取消注册函数的调用在模块退出函数中执行,典型方案如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#include <linux/fs.h>

#define MY_BLOCK_MAJOR 240
#define MY_BLKDEV_NAME "mybdev"

static int my_block_init(void)
{
int status;

status = register_blkdev(MY_BLOCK_MAJOR, MY_BLKDEV_NAME);
if (status < 0) {
printk(KERN_ERR "unable to register mybdev block device\n");
return -EBUSY;
}
//...
}

static void my_block_exit(void)
{
//...
unregister_blkdev(MY_BLOCK_MAJOR, MY_BLKDEV_NAME);
}

Register a disk

虽然函数 register_blkdev 注册了一个 major,但它不向系统提供设备(TYPE - disk),为了创建和使用块设备,使用 linux/genhd.h 中定义的专用接口:

1
2
3
#define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE) /* 分配一个块设备 */
void del_gendisk(struct gendisk *gp); /* 解除指定的块设备 */
void add_disk(struct gendisk *disk); /* 将磁盘添加到系统 */

使用案例如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#include <linux/fs.h>
#include <linux/genhd.h>

#define MY_BLOCK_MINORS 1

static struct my_block_dev {
struct gendisk *gd;
//...
} dev;

static int create_block_device(struct my_block_dev *dev)
{
dev->gd = alloc_disk(MY_BLOCK_MINORS);
//...
add_disk(dev->gd);
}

static void delete_block_device(struct my_block_dev *dev)
{
if (dev->gd)
del_gendisk(dev->gd);
//...
}

static int my_block_init(void)
{
//...
create_block_device(&dev);
}

static void my_block_exit(void)
{
delete_block_device(&dev);
//...
}
  • 在调用函数 add_disk 后(实际上在调用期间),磁盘立即处于活动状态,并且可以随时调用其方法
  • 因此,在驱动程序完全初始化并准备好响应对已注册磁盘的请求之前,不应调用此函数

结构体 gendisk 存储有关磁盘的信息,这样的结构是从调用 alloc_disk 中获得的,在将其发送到函数 add_disk 之前必须填充其字段

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
struct gendisk {
int major; /* major number of driver */
int first_minor;
int minors;
char disk_name[DISK_NAME_LEN]; /*示在sysfs中和sysfs中显示的磁盘名称 */
unsigned short events; /* supported events */
unsigned short event_flags; /* flags related to event processing */
struct disk_part_tbl __rcu *part_tbl;
struct hd_struct part0;
const struct block_device_operations *fops; /* 表示与磁盘关联的操作 */
struct request_queue *queue; /* 表示请求队列 */
void *private_data; /* 指向私有数据的指针 */
int flags;
unsigned long state;
#define GD_NEED_PART_SCAN 0
struct rw_semaphore lookup_sem;
struct kobject *slave_dir;
struct timer_rand_state *random;
atomic_t sync_io; /* RAID */
struct disk_events *ev;
#ifdef CONFIG_BLK_DEV_INTEGRITY
struct kobject integrity_kobj;
#endif /* CONFIG_BLK_DEV_INTEGRITY */
#if IS_ENABLED(CONFIG_CDROM)
struct cdrom_device_info *cdi;
#endif
int node_id;
struct badblocks *bb;
struct lockdep_map lockdep_map;
};

填充结构体 gendisk 的示例如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#include <linux/genhd.h>
#include <linux/fs.h>
#include <linux/blkdev.h>

#define NR_SECTORS 1024

#define KERNEL_SECTOR_SIZE 512

static struct my_block_dev {
//...
spinlock_t lock; /* For mutual exclusion */
struct request_queue *queue; /* The device request queue */
struct gendisk *gd; /* The gendisk structure */
//...
} dev;

static int create_block_device(struct my_block_dev *dev)
{
...
/* Initialize the gendisk structure */
dev->gd = alloc_disk(MY_BLOCK_MINORS);
if (!dev->gd) {
printk (KERN_NOTICE "alloc_disk failure\n");
return -ENOMEM;
}

dev->gd->major = MY_BLOCK_MAJOR;
dev->gd->first_minor = 0;
dev->gd->fops = &my_block_ops;
dev->gd->queue = dev->queue;
dev->gd->private_data = dev;
snprintf (dev->gd->disk_name, 32, "myblock");
set_capacity(dev->gd, NR_SECTORS);

add_disk(dev->gd);

return 0;
}

static int my_block_init(void)
{
int status;
//...
status = create_block_device(&dev);
if (status < 0)
return status;
//...
}

static void delete_block_device(struct my_block_dev *dev)
{
if (dev->gd) {
del_gendisk(dev->gd);
}
//...
}

static void my_block_exit(void)
{
delete_block_device(&dev);
//...
}

Request Queues Multi-Queue Block Layer

块设备的驱动程序使用请求队列来存储将要处理的块 I/O 请求:

  • 请求队列由结构表示 blk_mq_hw_ctx
  • 请求队列由请求及其关联控制信息的双链表组成
  • 请求通过更高级别的内核代码(例如,文件系统)添加到队列中

块设备驱动程序可以在前一个请求完成之前接受请求,因此,上层需要一种方法来知道请求何时完成,为此,在提交时向每个请求添加一个“标记”(用结构体 blk_mq_tag_set 来描述),并在请求完成后使用完成通知发回

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
struct blk_mq_tag_set {
struct blk_mq_queue_map map[HCTX_MAX_TYPES];
unsigned int nr_maps;
const struct blk_mq_ops *ops; /* 队列操作相关操作 */
unsigned int nr_hw_queues; /* 为设备分配的硬件队列数 */
unsigned int queue_depth; /* 硬件队列大小 */
unsigned int reserved_tags;
unsigned int cmd_size; /* 在设备末尾分配的额外字节数,如果需要,将由块设备驱动程序使用 */
int numa_node; /* 在NUMA系统中,这指的是存储设备连接到的节点的索引 */
unsigned int timeout;
unsigned int flags;
void *driver_data; /* 驱动程序专用数据 */
atomic_t active_queues_shared_sbitmap;

struct sbitmap_queue __bitmap_tags;
struct sbitmap_queue __breserved_tags;
struct blk_mq_tags **tags; /* 指向标签集数组的指针 */

struct mutex tag_list_lock;
struct list_head tag_list; /* 使用此标签集的请求队列链表 */
};

相关 API 如下:

1
2
3
4
5
6
7
8
9
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); /* 创建一个请求队列 */
void blk_cleanup_queue(struct request_queue *); /* 清除一个请求队列 */
int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); /* 初始化tag条目后,为一个或者多个请求队列分配tag和request集合 */
void blk_mq_free_tag_set(struct blk_mq_tag_set *set); /* 销毁并释放tag */

void blk_mq_start_request(struct request *rq); /* 在开始处理请求之前调用并通知上层 */
void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); /* 在队列中重新发送请求 */
void blk_mq_end_request(struct request *rq, blk_status_t error); /* 结束请求处理并通知上层 */
bool blk_rq_is_passthrough(struct request *rq); /* 验证请求类型 */

使用案例如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#include <linux/fs.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>

static struct my_block_dev {
//...
struct blk_mq_tag_set tag_set;
struct request_queue *queue;
//...
} dev;

static blk_status_t my_block_request(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *rq = bd->rq;
struct my_block_dev *dev = q->queuedata;
blk_mq_start_request(rq);
if (blk_rq_is_passthrough(rq)) {
printk (KERN_NOTICE "Skip non-fs request\n");
blk_mq_end_request(rq, BLK_STS_IOERR);
goto out;
}
/* do work */
...
blk_mq_end_request(rq, BLK_STS_OK);
out:
return BLK_STS_OK;
}

static struct blk_mq_ops my_queue_ops = {
.queue_rq = my_block_request,
};

static int create_block_device(struct my_block_dev *dev)
{
/* Initialize tag set. */
dev->tag_set.ops = &my_queue_ops;
dev->tag_set.nr_hw_queues = 1;
dev->tag_set.queue_depth = 128;
dev->tag_set.numa_node = NUMA_NO_NODE;
dev->tag_set.cmd_size = 0;
dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
err = blk_mq_alloc_tag_set(&dev->tag_set);
if (err) {
goto out_err;
}

/* Allocate queue. */
dev->queue = blk_mq_init_queue(&dev->tag_set);
if (IS_ERR(dev->queue)) {
goto out_blk_init;
}

blk_queue_logical_block_size(dev->queue, KERNEL_SECTOR_SIZE);

/* Assign private data to queue structure. */
dev->queue->queuedata = dev;
//...

out_blk_init:
blk_mq_free_tag_set(&dev->tag_set);
out_err:
return -ENOMEM;
}

static int my_block_init(void)
{
int status;
//...
status = create_block_device(&dev);
if (status < 0)
return status;
//...
}

static void delete_block_device(struct block_dev *dev)
{
//...
blk_mq_free_tag_set(&dev->tag_set);
blk_cleanup_queue(dev->queue);
}

static void my_block_exit(void)
{
delete_block_device(&dev);
//...
}

Structure struct bio

Linux Block 层 作为 IO 子系统的中间层,他为上层输出接口,为下层提供数据,在整个 block 层的最小单位,不可分割

结构体 bio 用于描述一个内存块:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
struct bio {
struct bio *bi_next; /* request queue link */
struct gendisk *bi_disk; /* 表示一个独立的磁盘设备 */
unsigned int bi_opf; /* 标志信息 */

unsigned short bi_flags; /* status, etc and bvec pool number */
unsigned short bi_ioprio;
unsigned short bi_write_hint;
blk_status_t bi_status;
u8 bi_partno;
atomic_t __bi_remaining;

struct bvec_iter bi_iter; /* 迭代器(用来遍历bvec,也就是bio数据区) */

bio_end_io_t *bi_end_io;

void *bi_private;
#ifdef CONFIG_BLK_CGROUP
struct blkcg_gq *bi_blkg;
struct bio_issue bi_issue;
#ifdef CONFIG_BLK_CGROUP_IOCOST
u64 bi_iocost_cost;
#endif
#endif

#ifdef CONFIG_BLK_INLINE_ENCRYPTION
struct bio_crypt_ctx *bi_crypt_context;
#endif

union {
#if defined(CONFIG_BLK_DEV_INTEGRITY)
struct bio_integrity_payload *bi_integrity; /* data integrity */
#endif
};

unsigned short bi_vcnt; /* how many bio_vec's */
unsigned short bi_max_vecs; /* max bvl_vecs we can hold */

atomic_t __bi_cnt; /* pin count */

struct bio_vec *bi_io_vec; /* the actual vec list */
struct bio_set *bi_pool;
struct bio_vec bi_inline_vecs[];
};

相关 API 如下:

1
struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs); /* 用于处理请求队列的有用函数 */
  • 调用 bio_alloc 以后,往往需要马上填充 bio 中的条目(尤其是:bi_diskbi_iterbi_opf),案例如下:
1
2
3
4
5
6
7
8
struct bio *bio = bio_alloc(GFP_NOIO, 1);
//...
bio->bi_disk = bdev->bd_disk;
bio->bi_iter.bi_sector = sector;
bio->bi_opf = REQ_OP_READ;
page = alloc_page(GFP_NOIO);
bio_add_page(bio, page, size, offset);
//...

如果想要对 bio 进行操作(增删改查),必须将该结构的支持页面映射到对应的内核地址空间,操作完毕后再把映射解除

  • 对于 mapping/unmapping 映射,请使用 kmap_atomickunmap_atomic
1
2
#define kmap_atomic(page) /* 为物理页page建立内存映射 */
#define kunmap_atomic(addr) /* 解除虚拟地址addr的内存映射 */

遍历 bio 并输出其关联内容的模板如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
static void my_block_transfer(struct my_block_dev *dev, size_t start,
size_t len, char *buffer, int dir);


static int my_xfer_bio(struct my_block_dev *dev, struct bio *bio)
{
struct bio_vec bvec;
struct bvec_iter iter;

/* Do each segment independently. */
bio_for_each_segment(bvec, bio, iter) {
sector_t sector = iter.bi_sector;
char *buffer = kmap_atomic(bvec.bv_page);
unsigned long offset = bvec.bv_offset;
size_t len = bvec.bv_len;
int dir = bio_data_dir(bio);
printk(KERN_LOG_LEVEL "%s: buf %8p offset %lu len %u dir %d\n", __func__, buffer, offset, len, dir);
/* process mapped buffer */
my_block_transfer(dev, sector, len, buffer + offset, dir);
kunmap_atomic(buffer);
}
return 0;
}

static int my_xfer_request(struct my_block_dev *dev, struct request *req)
{
struct bio_vec bvec;
struct req_iterator iter;

/* Do each segment independently. */
rq_for_each_segment(bvec, req, iter) {
sector_t sector = iter.iter.bi_sector;
char *buffer = kmap_atomic(bvec.bv_page);
unsigned long offset = bvec.bv_offset;
size_t len = bvec.bv_len;
int dir = bio_data_dir(bio);
printk(KERN_LOG_LEVEL "%s: buf %8p offset %lu len %u dir %d\n", __func__, buffer, offset, len, dir);
/* process mapped buffer */
my_block_transfer(dev, sector, len, buffer + offset, dir);
kunmap_atomic(buffer);
}
return 0;
}
  • 这两个模板比较固定,可以直接拿出来用

Exercises

要解决练习,您需要执行以下步骤:

  • 从模板准备 skeletons
  • 构建模块
  • 将模块复制到虚拟机
  • 启动 VM 并在 VM 中测试模块
1
2
3
make clean
LABS=block_device_drivers make skels
make build

Test1 完整代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
/*
* SO2 - Block device drivers lab (#7)
* Linux - Exercise #1, #2, #3, #6 (RAM Disk)
*/

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>

#include <linux/genhd.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/blk_types.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/bio.h>
#include <linux/vmalloc.h>

MODULE_DESCRIPTION("Simple RAM Disk");
MODULE_AUTHOR("SO2");
MODULE_LICENSE("GPL");


#define KERN_LOG_LEVEL KERN_ALERT

#define MY_BLOCK_MAJOR 240
#define MY_BLKDEV_NAME "mybdev"
#define MY_BLOCK_MINORS 1
#define NR_SECTORS 128

#define KERNEL_SECTOR_SIZE 512

/* TODO 6: use bios for read/write requests */
#define USE_BIO_TRANSFER 0


static struct my_block_dev {
struct blk_mq_tag_set tag_set;
struct request_queue *queue;
struct gendisk *gd;
u8 *data;
size_t size;
} g_dev;

static int my_block_open(struct block_device *bdev, fmode_t mode)
{
return 0;
}

static void my_block_release(struct gendisk *gd, fmode_t mode)
{
}

static const struct block_device_operations my_block_ops = {
.owner = THIS_MODULE,
.open = my_block_open,
.release = my_block_release
};

static void my_block_transfer(struct my_block_dev *dev, sector_t sector,
unsigned long len, char *buffer, int dir)
{
unsigned long offset = sector * KERNEL_SECTOR_SIZE;

/* check for read/write beyond end of block device */
if ((offset + len) > dev->size)
return;

/* TODO 3: read/write to dev buffer depending on dir */
if(dir == 1){
memcpy(dev->data + offset,buffer,len);
}
else{
memcpy(buffer,dev->data + offset,len);
}
}

/* to transfer data using bio structures enable USE_BIO_TRANFER */
#if USE_BIO_TRANSFER == 1
static void my_xfer_request(struct my_block_dev *dev, struct request *req)
{
/* TODO 6: iterate segments */
struct bio_vec bvec;
struct req_iterator iter;
/* TODO 6: copy bio data to device buffer */
rq_for_each_segment(bvec,req,iter){
sector_t sector = iter.iter.bi_sector;
unsigned long offset = bvec.bv_offset;
size_t len = bvec.bv_len;
int dir = bio_data_dir(iter.bio);
char *buffer = kmap_atomic(bvec.bv_page);
printk(KERN_LOG_LEVEL "%s: buf %8p offset %lu len %u dir %d\n", __func__, buffer, offset, len, dir);

my_block_transfer(dev, sector, len, buffer + offset, dir);
kunmap_atomic(buffer);
}
}
#endif

static blk_status_t my_block_request(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *rq;
struct my_block_dev *dev = hctx->queue->queuedata;

/* TODO 2: get pointer to request */
rq = bd->rq;
/* TODO 2: start request processing. */
blk_mq_start_request(rq);
/* TODO 2: check fs request. Return if passthrough. */
if(blk_rq_is_passthrough(rq)){
printk (KERN_NOTICE "Skip non-fs request\n");
blk_mq_end_request(rq, BLK_STS_IOERR);
goto out;
}
/* TODO 2: print request information */
printk(KERN_LOG_LEVEL
"request received: pos=%llu bytes=%u "
"cur_bytes=%u dir=%c\n",
(unsigned long long) blk_rq_pos(rq),
blk_rq_bytes(rq), blk_rq_cur_bytes(rq),
rq_data_dir(rq) ? 'W' : 'R');


#if USE_BIO_TRANSFER == 1
/* TODO 6: process the request by calling my_xfer_request */
my_xfer_request(dev,rq)
#else
/* TODO 3: process the request by calling my_block_transfer */
my_block_transfer(dev,blk_rq_pos(rq),blk_rq_bytes(rq),bio_data(rq->bio),rq_data_dir(rq));
#endif

/* TODO 2: end request successfully */
blk_mq_end_request(rq, BLK_STS_OK);
out:
return BLK_STS_OK;
}

static struct blk_mq_ops my_queue_ops = {
.queue_rq = my_block_request,
};

static int create_block_device(struct my_block_dev *dev)
{
int err;

dev->size = NR_SECTORS * KERNEL_SECTOR_SIZE;
dev->data = vmalloc(dev->size);
if (dev->data == NULL) {
printk(KERN_ERR "vmalloc: out of memory\n");
err = -ENOMEM;
goto out_vmalloc;
}

/* Initialize tag set. */
dev->tag_set.ops = &my_queue_ops;
dev->tag_set.nr_hw_queues = 1;
dev->tag_set.queue_depth = 128;
dev->tag_set.numa_node = NUMA_NO_NODE;
dev->tag_set.cmd_size = 0;
dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
err = blk_mq_alloc_tag_set(&dev->tag_set);
if (err) {
printk(KERN_ERR "blk_mq_alloc_tag_set: can't allocate tag set\n");
goto out_alloc_tag_set;
}

/* Allocate queue. */
dev->queue = blk_mq_init_queue(&dev->tag_set);
if (IS_ERR(dev->queue)) {
printk(KERN_ERR "blk_mq_init_queue: out of memory\n");
err = -ENOMEM;
goto out_blk_init;
}
blk_queue_logical_block_size(dev->queue, KERNEL_SECTOR_SIZE);
dev->queue->queuedata = dev;

/* initialize the gendisk structure */
dev->gd = alloc_disk(MY_BLOCK_MINORS);
if (!dev->gd) {
printk(KERN_ERR "alloc_disk: failure\n");
err = -ENOMEM;
goto out_alloc_disk;
}

dev->gd->major = MY_BLOCK_MAJOR;
dev->gd->first_minor = 0;
dev->gd->fops = &my_block_ops;
dev->gd->queue = dev->queue;
dev->gd->private_data = dev;
snprintf(dev->gd->disk_name, DISK_NAME_LEN, "myblock");
set_capacity(dev->gd, NR_SECTORS);

add_disk(dev->gd);

return 0;

out_alloc_disk:
blk_cleanup_queue(dev->queue);
out_blk_init:
blk_mq_free_tag_set(&dev->tag_set);
out_alloc_tag_set:
vfree(dev->data);
out_vmalloc:
return err;
}

static int __init my_block_init(void)
{
int err = 0;

/* TODO 1: register block device */
int status = register_blkdev(MY_BLOCK_MAJOR,MY_BLKDEV_NAME);
if(status < 0){
printk(KERN_ERR "unable to register mybdev block device\n");
return -EBUSY;
}
/* TODO 2: create block device using create_block_device */
err = create_block_device(&g_dev);
return 0;

out:
/* TODO 2: unregister block device in case of an error */
unregister_blkdev(MY_BLOCK_MAJOR, MY_BLKDEV_NAME);
return err;
}

static void delete_block_device(struct my_block_dev *dev)
{
if (dev->gd) {
del_gendisk(dev->gd);
put_disk(dev->gd);
}

if (dev->queue)
blk_cleanup_queue(dev->queue);
if (dev->tag_set.tags)
blk_mq_free_tag_set(&dev->tag_set);
if (dev->data)
vfree(dev->data);
}

static void __exit my_block_exit(void)
{
/* TODO 2: cleanup block device using delete_block_device */
delete_block_device(&g_dev);
/* TODO 1: unregister block device */
unregister_blkdev(MY_BLOCK_MAJOR, MY_BLKDEV_NAME);
}

module_init(my_block_init);
module_exit(my_block_exit);
  • 在提交块 IO 请求时,需要附带3个关键结构体:
    • blk_mq_tag_set 类型的“标记”
    • request_queue 类型的请求队列
    • gendisk 类型的磁盘相关信息
  • 结果:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
root@qemux86:~/skels/block_device_drivers/1-2-3-6-ram-disk/user# ./ram-disk-test

insmod ../kernel/ram-disk.ko
mknod /dev/myblock b 240 0
mknod: /dev/myblock: File exists
request received: pos=0 bytes=4096 cur_bytes=4096 dir=R
request received: pos=0 bytes=4096 cur_bytes=4096 dir=W
test sector 0 ... passed
request received: pos=0 bytes=4096 cur_bytes=4096 dir=W
test sector 1 ... passed
request received: pos=0 bytes=4096 cur_bytes=4096 dir=W
test sector 2 ... passed
request received: pos=0 bytes=4096 cur_bytes=4096 dir=W
test sector 3 ... passed
request received: pos=0 bytes=4096 cur_bytes=4096 dir=W
test sector 4 ... passed
request received: pos=0 bytes=4096 cur_bytes=4096 dir=W
test sector 5 ... passed
request received: pos=0 bytes=4096 cur_bytes=4096 dir=W
test sector 6 ... passed
request received: pos=0 bytes=4096 cur_bytes=4096 dir=W

Test2 完整代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
/*
* SO2 Lab - Block device drivers (#7)
* Linux - Exercise #4, #5 (Relay disk - bio)
*/

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>

MODULE_AUTHOR("SO2");
MODULE_DESCRIPTION("Relay disk");
MODULE_LICENSE("GPL");

#define KERN_LOG_LEVEL KERN_ALERT

#define PHYSICAL_DISK_NAME "/dev/vdb"
#define KERNEL_SECTOR_SIZE 512

#define BIO_WRITE_MESSAGE "def"

/* pointer to physical device structure */
static struct block_device *phys_bdev;

static void send_test_bio(struct block_device *bdev, int dir)
{
struct bio *bio = bio_alloc(GFP_NOIO, 1);
struct page *page;
char *buf;

/* TODO 4: fill bio (bdev, sector, direction) */
bio->bi_disk = bdev->bd_disk;
bio->bi_iter.bi_sector = 0;
bio->bi_opf = dir;
page = alloc_page(GFP_NOIO);
bio_add_page(bio, page, KERNEL_SECTOR_SIZE, 0);

/* TODO 5: write message to bio buffer if direction is write */
if (dir == REQ_OP_WRITE) {
buf = kmap_atomic(page);
memcpy(buf, BIO_WRITE_MESSAGE, strlen(BIO_WRITE_MESSAGE));
kunmap_atomic(buf);
}
/* TODO 4: submit bio and wait for completion */
printk(KERN_LOG_LEVEL "[send_test_bio] Submiting bio\n");
submit_bio_wait(bio);
printk(KERN_LOG_LEVEL "[send_test_bio] Done bio\n");
/* TODO 4: read data (first 3 bytes) from bio buffer and print it */
buf = kmap_atomic(page);
printk(KERN_LOG_LEVEL "read %02x %02x %02x\n", buf[0], buf[1], buf[2]);
kunmap_atomic(buf);

bio_put(bio);
__free_page(page);
}

static struct block_device *open_disk(char *name)
{
struct block_device *bdev;

/* TODO 4: get block device in exclusive mode */
bdev = blkdev_get_by_path(name,FMODE_READ | FMODE_WRITE | FMODE_EXCL,THIS_MODULE);
return bdev;
}

static int __init relay_init(void)
{
phys_bdev = open_disk(PHYSICAL_DISK_NAME);
if (phys_bdev == NULL) {
printk(KERN_ERR "[relay_init] No such device\n");
return -EINVAL;
}
send_test_bio(phys_bdev, REQ_OP_READ);
return 0;
}

static void close_disk(struct block_device *bdev)
{
/* TODO 4: put block device */
blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
}

static void __exit relay_exit(void)
{
/* TODO 5: send test write bio */
send_test_bio(phys_bdev, REQ_OP_WRITE);
close_disk(phys_bdev);
}

module_init(relay_init);
module_exit(relay_exit);
  • PS:对结构体 bio 的操作必须包裹在 kmap_atomickunmap_atomic 之间
  • 结果:
1
2
3
4
5
6
7
8
root@qemux86:~/skels/block_device_drivers/4-5-relay-disk# insmod relay-disk.ko 
[send_test_bio] Submiting bio
[send_test_bio] Done bio
read 64 65 66
root@qemux86:~/skels/block_device_drivers/4-5-relay-disk# rmmod relay-disk.ko
[send_test_bio] Submiting bio
[send_test_bio] Done bio
read 64 65 66