0%

Linux 各种内存共享

SYStemV 共享内存

传统的 SYStemV 共享内存是指 shm 那一伙 API:

1
2
3
4
int shmget(key_t key, size_t size, int shmflg); /* 获取一个新的共享内存段 */
void *shmat(int shmid, const void *shmaddr, int shmflg); /* 进行内存映射 */
int shmdt(const void *shmaddr); /* 删除内存映射 */
int shmctl(int shmid, int cmd, struct shmid_ds *buf); /* 对共享内存段进行操作 */

在早期版本的内核中,“共享内存”,“信号量”,“消息队列” 都使用通用的 ipcget 函数完成创建,只是 ipc_ops 结构体的初始化不同

其实 do_shmat 底层申请内存的部分和 mmap 一样,都是调用 do_mmap_pgoff,效果就是映射一片由 VMA 组织起来的共享内存段

之后的 shmget 通过相同的 key,就可以获取同一片共享内存区域

POSIX 共享内存

传统的 SYStemV shm 共享内存有个升级版的 POSIX API:

1
2
3
static void shm_open(struct vm_area_struct *vma); /* 在/dev/shm/下建立一个文件,作为该进程的共享内存 */
static void shm_close(struct vm_area_struct *vma); /* 释放目标共享内存 */
static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp); /* 销毁/dev/shm/中对应的文件 */
  • /dev/shm/ 是一个使用就是 tmpfs 文件系统的设备,可以理解为只存在于内存上的文件

还有个更经典的 POSIX API 就是 mmap:

1
void *mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset);
  • mmap 需要和磁盘进行交互(非匿名映射),导致效率没有 shm 好,但它能够存储的空间更大

memfd_create 共享内存

函数 memfd_create 可以创建一个“虚拟文件”,它映射到一片物理内存而不是磁盘

1
int memfd_create(const char *name, unsigned int flags);
  • 创建基于 tmpfs 的匿名文件(返回文件描述符)

函数 memfd_create 本身并没有共享内存的能力,但是通过之前的 FD 转移技术可以实现共享内存(把 memfd_create 生成的 FD 转移到其他进程中,就实现共享内存了)

使用案例如下:

  • 发送共享内存句柄:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#include <fcntl.h>
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <sys/socket.h>
#include <sys/mman.h>
#include <linux/memfd.h>
#include <sys/syscall.h>

#define handle_error(msg) do { perror(msg); exit(EXIT_FAILURE); } while(0)

static void send_fd(int socket, int *fd, int n)
{
struct msghdr msg = {0};
struct cmsghdr *cmsg;
char buf[CMSG_SPACE(n * sizeof(int))], dup[256];
memset(buf, '\0', sizeof(buf));
struct iovec io = { .iov_base = &dup, .iov_len = sizeof(dup) };

msg.msg_iov = &io;
msg.msg_iovlen = 1;
msg.msg_control = buf;
msg.msg_controllen = sizeof(buf);

cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(n * sizeof(int));

memcpy ((int *) CMSG_DATA(cmsg), fd, n * sizeof (int));

if (sendmsg (socket, &msg, 0) < 0)
handle_error ("Failed to send message");
}

int main(int argc, char *argv[]) {
int sfd, fd;
struct sockaddr_un addr;
char *buffer;

srand((unsigned int)time(NULL));

sfd = socket(AF_UNIX, SOCK_STREAM, 0);
if (sfd == -1)
handle_error ("Failed to create socket");

memset(&addr, 0, sizeof(struct sockaddr_un));
addr.sun_family = AF_UNIX;
strncpy(addr.sun_path, "/tmp/fd-pass.socket", sizeof(addr.sun_path) - 1);

fd = syscall(__NR_memfd_create,"shm",MFD_CLOEXEC);
ftruncate(fd,0x1000);

if (fd < 0)
handle_error ("Failed to open file 1 for reading");
else
fprintf (stdout, "Opened fd %d in parent\n", fd);

if (connect(sfd, (struct sockaddr *) &addr, sizeof(struct sockaddr_un)) == -1)
handle_error ("Failed to connect to socket");

send_fd (sfd, &fd, 1);

buffer = (char *)mmap(NULL,0x30,PROT_WRITE|PROT_READ,MAP_SHARED,fd,0);
if(buffer == NULL){
perror("[mmap error]");
return -1;
}
while(1){
int t=(int)(rand()%1000)/100;
sprintf(buffer,"yhellow: %d",t);
sleep(2);
}

exit(EXIT_SUCCESS);
}
  • 接收共享内存句柄:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#include <fcntl.h>
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/syscall.h>

#define handle_error(msg) do { perror(msg); exit(EXIT_FAILURE); } while(0)

static int * recv_fd(int socket, int n) {
int *fds = malloc (n * sizeof(int));
struct msghdr msg = {0};
struct cmsghdr *cmsg;
char buf[CMSG_SPACE(n * sizeof(int))], dup[256];
memset(buf, '\0', sizeof(buf));
struct iovec io = { .iov_base = &dup, .iov_len = sizeof(dup) };

msg.msg_iov = &io;
msg.msg_iovlen = 1;
msg.msg_control = buf;
msg.msg_controllen = sizeof(buf);

if (recvmsg (socket, &msg, 0) < 0)
handle_error ("Failed to receive message");

cmsg = CMSG_FIRSTHDR(&msg);

memcpy (fds, (int *) CMSG_DATA(cmsg), n * sizeof(int));

return fds;
}

int main(int argc, char *argv[]) {
ssize_t nbytes;
char *buffer;
int sfd, cfd, *fdp;
struct sockaddr_un addr;

sfd = socket(AF_UNIX, SOCK_STREAM, 0);
if (sfd == -1)
handle_error ("Failed to create socket");

if (unlink ("/tmp/fd-pass.socket") == -1 && errno != ENOENT)
handle_error ("Removing socket file failed");

memset(&addr, 0, sizeof(struct sockaddr_un));
addr.sun_family = AF_UNIX;
strncpy(addr.sun_path, "/tmp/fd-pass.socket", sizeof(addr.sun_path) - 1);

if (bind(sfd, (struct sockaddr *) &addr, sizeof(struct sockaddr_un)) == -1)
handle_error ("Failed to bind to socket");

if (listen(sfd, 5) == -1)
handle_error ("Failed to listen on socket");

cfd = accept(sfd, NULL, NULL);
if (cfd == -1)
handle_error ("Failed to accept incoming connection");

fdp = recv_fd (cfd, 1);
buffer = (char *)mmap(NULL,0x30,PROT_WRITE|PROT_READ,MAP_SHARED,*fdp,0);

while (1){
fprintf (stdout, "Reading from passed fd %d\n", *fdp);
printf("%s\n",buffer);
sleep(3);
}

if (close(cfd) == -1)
handle_error ("Failed to close client socket");

return 0;
}
  • 结果:
1
2
3
4
5
6
7
8
9
10
11
12
13
exp ./send 
Opened fd 4 in parent
exp ./read
Reading from passed fd 5
yhellow: 4
Reading from passed fd 5
yhellow: 0
Reading from passed fd 5
yhellow: 5
Reading from passed fd 5
yhellow: 4
Reading from passed fd 5
yhellow: 3

参考:共享内存技术之memfd_create

dma_buf 共享内存

dma_buf 可以实现 buffer 在多个设备的共享,如果设备驱动想要共享 DMA 缓冲区,可以让一个驱动来导出,一个驱动来使用:

  • 可以把一片底层驱动 A 的 buffer 导出到用户空间成为一个 fd
  • 也可以把 fd 导入到底层驱动 B
  • 如果进行 mmap 得到虚拟地址,CPU 也是可以在用户空间访问到已经获得用户空间虚拟地址的底层 buffer 的

类似于消费者生产者模型,Linux DMA-BUF 就是基于这种方式来实现的

dma_buf 在内核中的结构体如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
struct dma_buf {
size_t size;
struct file *file;
struct list_head attachments;
const struct dma_buf_ops *ops;
struct mutex lock;
unsigned vmapping_counter;
void *vmap_ptr;
const char *exp_name;
struct module *owner;
struct list_head list_node;
void *priv;
struct reservation_object *resv;

/* poll support */
wait_queue_head_t poll;

struct dma_buf_poll_cb_t {
struct dma_fence_cb cb;
wait_queue_head_t *poll;

__poll_t active;
} cb_excl, cb_shared;
};

当用户 call VIDIOC_EXPBUF 这个 IOCTL 的时候,可以把 dma_buf 转化为 fd:

1
int ioctl(int fd, VIDIOC_EXPBUF, struct v4l2_exportbuffer *argp);

想要把 dma_buf 的导入侧设备驱动,则会用到如下这些API:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/* 导出缓冲 */
#define dma_buf_export(priv, ops, size, flags) \
dma_buf_export_named(priv, ops, size, flags, __FILE__)
struct dma_buf *dma_buf_export_named(void *priv, struct dma_buf_ops *ops,
size_t size, int flags,
const char *exp_name);
/* 获取文件描述符 */
int dma_buf_fd(struct dma_buf *dmabuf, int flags);

/* 连接缓冲 */
struct dma_buf *dma_buf_get(int fd);
struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
struct device *dev);

/* 申请访问 */
struct sg_table * dma_buf_map_attachment(struct dma_buf_attachment *,
enum dma_data_direction);
void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *);

/* 断开连接 */
void dma_buf_detach(struct dma_buf *dmabuf,
struct dma_buf_attachment *dmabuf_attach);
void dma_buf_put(struct dma_buf *dmabuf);

可以通过如下方式获取文件描述符:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
int buffer_export(int v4lfd, enum v4l2_buf_type bt, int index, int *dmafd)
{
struct v4l2_exportbuffer expbuf;

memset(&expbuf, 0, sizeof(expbuf));
expbuf.type = bt;
expbuf.index = index;
// int ioctl(int fd, int request, struct v4l2_exportbuffer *argp);
if (ioctl(v4lfd, VIDIOC_EXPBUF, &expbuf) == -1) {
perror("VIDIOC_EXPBUF");
return -1;
}

*dmafd = expbuf.fd;

return 0;
}