0%

IO_FILE源码分析:fread

IO_FILE源码分析:fread

但程序从键盘读入数据时,程序会先把数据存储到“输入缓存区”中

直接上源码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
_IO_size_t
_IO_fread (buf, size, count, fp)
void *buf;
_IO_size_t size;
_IO_size_t count;
_IO_FILE *fp;
{
_IO_size_t bytes_requested = size * count;
_IO_size_t bytes_read;
CHECK_FILE (fp, 0); /* 简单检查 */
if (bytes_requested == 0)
return 0;
_IO_cleanup_region_start ((void (*) __P ((void *))) _IO_funlockfile, fp);
_IO_flockfile (fp);
bytes_read = _IO_sgetn (fp, (char *) buf, bytes_requested); /* 核心 */
_IO_funlockfile (fp);
_IO_cleanup_region_end (0);
return bytes_requested == bytes_read ? count : bytes_read / size;
}

_IO_fread 函数的功能主要由 _IO_sgetn 函数实现,其他函数都是辅助功能:

1
2
3
4
5
6
7
8
9
_IO_size_t
_IO_sgetn (fp, data, n)
_IO_FILE *fp;
void *data;
_IO_size_t n;
{
/* FIXME handle putback buffer here! */
return _IO_XSGETN (fp, data, n);
}

继续跟进 _IO_XSGETN 函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#define _IO_XSGETN(FP, DATA, N) JUMP2 (__xsgetn, FP, DATA, N)

_IO_size_t
_IO_file_xsgetn (fp, data, n)
_IO_FILE *fp;
void *data;
_IO_size_t n;
{
register _IO_size_t want, have;
register _IO_ssize_t count;
register char *s = data; /* 指向需要装入数据的目标变量 */

want = n;

if (fp->_IO_buf_base == NULL) /* 输入缓冲区未建立时 */
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
free (fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}
_IO_doallocbuf (fp); /* 建立输入缓冲区 */
}

while (want > 0) /* want为需要的数据 */
{
have = fp->_IO_read_end - fp->_IO_read_ptr; /* 计算拥有的输入缓冲区大小 */
if (want <= have) /* 输入缓冲区够用 */
{
memcpy (s, fp->_IO_read_ptr, want); /* 直接把输入缓冲区拷贝给目标变量 */
fp->_IO_read_ptr += want;
want = 0;
}
else /* 输入缓冲区不够用 */
{
if (have > 0) /* 先把可以用的缓冲区用光 */
{
#ifdef _LIBC
s = __mempcpy (s, fp->_IO_read_ptr, have);
#else
memcpy (s, fp->_IO_read_ptr, have);
s += have;
#endif
want -= have;
fp->_IO_read_ptr += have;
}

/* Check for backup and repeat */
if (_IO_in_backup (fp)) /* 基础检查,可以跳过 */
{
_IO_switch_to_main_get_area (fp);
continue;
}

if (fp->_IO_buf_base && want < fp->_IO_buf_end - fp->_IO_buf_base)
{
if (__underflow (fp) == EOF)
/* 执行系统调用read读取数据,并放入到输入缓冲区里 */
break;

continue;
}

_IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);
_IO_setp (fp, fp->_IO_buf_base, fp->_IO_buf_base);
/* 进行FILE结构体的更新设置 */

count = want;
if (fp->_IO_buf_base)
{
_IO_size_t block_size = fp->_IO_buf_end - fp->_IO_buf_base;
if (block_size >= 128)
count -= want % block_size;
}

count = _IO_SYSREAD (fp, s, count);
if (count <= 0)
{
if (count == 0)
fp->_flags |= _IO_EOF_SEEN;
else
fp->_flags |= _IO_ERR_SEEN;

break;
}

s += count;
want -= count;
if (fp->_offset != _IO_pos_BAD)
_IO_pos_adjust (fp->_offset, count);
}
}

return n - want;
}

_IO_file_xsgetn 是处理 fread 读入数据的核心函数,分为三个部分:

  • 第一部分是 fp->_IO_buf_base 为空的情况,表明此时的FILE结构体中的指针未被初始化,输入缓冲区未建立,则调用 _IO_doallocbuf 去初始化指针,建立输入缓冲区
  • 第二部分是输入缓冲区里有输入并且够用,此时将缓冲区里的数据直接拷贝至目标buff
  • 第三部分是输入缓冲区里的数据为空或者是不能满足全部的需求,则调用 __underflow 调用系统调用读入数据到缓冲区,然后再把数据从缓冲区中复制给用户

建立输入缓冲区

如果输入缓存区未建立,那么程序会调用 _IO_doallocbuf 建立输入缓冲区:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
void
_IO_doallocbuf (fp)
_IO_FILE *fp;
{
if (fp->_IO_buf_base) /* 如果输入缓冲区不为空,直接返回(再次检查输入缓存区) */
return;
if (!(fp->_flags & _IO_UNBUFFERED) || fp->_mode > 0)
/* 检查fp->_flags是否为_IO_UNBUFFERED || fp->_mode大于0 */
if (_IO_DOALLOCATE (fp) != EOF) /* 调用vtable函数(_IO_file_doallocate) */
return;
_IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0);
/* 设置_IO_buf_base和_IO_buf_end*/
/* 如果_IO_DOALLOCATE调用失败,那么其内部的_IO_setb将无法调用,可能会出现BUG */
/* 所以在_IO_DOALLOCATE外面再次调用_IO_setb以防万一 */
}

如果条件满足,就会调用 vtable 中的 _IO_file_doallocate(建立输入缓冲区和主体):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# define ALLOC_BUF(_B, _S, _R) \
do { \
(_B) = (char *) mmap (0, ROUND_TO_PAGE (_S), \
PROT_READ | PROT_WRITE, \
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
if ((_B) == (char *) MAP_FAILED) \
return (_R); \
} while (0)

int
_IO_file_doallocate (fp)
_IO_FILE *fp;
{
_IO_size_t size;
int couldbetty;
char *p;
struct _G_stat64 st;

#ifndef _LIBC
if (_IO_cleanup_registration_needed)
(*_IO_cleanup_registration_needed) ();
#endif

if (fp->_fileno < 0 || _IO_SYSSTAT (fp, &st) < 0)
/* 获取文件信息(vtable->__stat) */
{
couldbetty = 0;
size = _IO_BUFSIZ;
#if 0
/* do not try to optimise fseek() */
fp->_flags |= __SNPT;
#endif
}
else
{
couldbetty = S_ISCHR (st.st_mode);
#if _IO_HAVE_ST_BLKSIZE
size = st.st_blksize <= 0 ? _IO_BUFSIZ : st.st_blksize;
#else
size = _IO_BUFSIZ;
#endif
}
ALLOC_BUF (p, size, EOF); /* 通过mmap分配内存 */
_IO_setb (fp, p, p + size, 1);
if (couldbetty && isatty (fp->_fileno))
fp->_flags |= _IO_LINE_BUF;
return 1;
}

获取文件信息后,利用宏函数 ALLOC_BUF 分配内存,接着调用 _IO_setb :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
void
_IO_setb (f, b, eb, a)
_IO_FILE *f;
char *b;
char *eb;
int a;
{
if (f->_IO_buf_base && !(f->_flags & _IO_USER_BUF))
FREE_BUF (f->_IO_buf_base, _IO_blen (f));
f->_IO_buf_base = b;
f->_IO_buf_end = eb;
if (a) /* 设置flag */
f->_flags &= ~_IO_USER_BUF;
else
f->_flags |= _IO_USER_BUF;
}

设置了 _IO_buf_base_IO_buf_end

将缓冲区里的数据直接拷贝至目标buff

1
2
3
4
5
6
7
8
9
10
 while (want > 0) /* want为需要的数据 */
{
have = fp->_IO_read_end - fp->_IO_read_ptr; /* 计算拥有的输入缓冲区大小 */
if (want <= have) /* 输入缓冲区够用 */
{
memcpy (s, fp->_IO_read_ptr, want); /* 直接把输入缓冲区拷贝给目标变量 */
fp->_IO_read_ptr += want;
want = 0;
}
........

这部分比较简单,判断输入缓冲区够用以后,直接就复制给用户缓冲区了(目标变量)

调用系统调用读入数据

如果是输入缓冲区里的数据为空或者是不能满足全部的需求,则会调用 __underflow :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
int
__underflow (fp)
_IO_FILE *fp;
{
#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
if (fp->_vtable_offset == 0 && _IO_fwide (fp, -1) != -1)
return EOF;
#endif

if (fp->_mode == 0)
_IO_fwide (fp, -1);
if (_IO_in_put_mode (fp))
if (_IO_switch_to_get_mode (fp) == EOF)
return EOF;
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
if (_IO_in_backup (fp))
{
_IO_switch_to_main_get_area (fp);
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
}
if (_IO_have_markers (fp))
{
if (save_for_backup (fp, fp->_IO_read_end))
return EOF;
}
else if (_IO_have_backup (fp))
_IO_free_backup_area (fp);
return _IO_UNDERFLOW (fp); /* vtable->_IO_new_file_underflow */
}

前面的都是检查直接跳过,到后面调用 _IO_UNDERFLOW 才是关键(其实它就vtable里面的 _IO_new_file_underflow):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# define _IO_new_file_underflow _IO_file_underflow

int
_IO_new_file_underflow (fp)
_IO_FILE *fp;
{
_IO_ssize_t count;
#if 0
/* SysV does not make this test; take it out for compatibility */
if (fp->_flags & _IO_EOF_SEEN) /* _flag标志位是否包含_IO_NO_READS */
return (EOF);
#endif

if (fp->_flags & _IO_NO_READS)
{
fp->_flags |= _IO_ERR_SEEN;
__set_errno (EBADF);
return EOF;
}
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;

if (fp->_IO_buf_base == NULL) /* 调用_IO_doallocbuf分配输入缓冲区 */
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
free (fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}
_IO_doallocbuf (fp);
}

/* Flush all line buffered files before reading. */
/* FIXME This can/should be moved to genops ?? */
if (fp->_flags & (_IO_LINE_BUF|_IO_UNBUFFERED))
_IO_flush_all_linebuffered ();

_IO_switch_to_get_mode (fp);

/* 初始化设置FILE结构体指针,将他们都设置成fp->_IO_buf_base */
fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_buf_base;
fp->_IO_read_end = fp->_IO_buf_base;
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end
= fp->_IO_buf_base;

count = _IO_SYSREAD (fp, fp->_IO_buf_base,
fp->_IO_buf_end - fp->_IO_buf_base);
/* _IO_SYSREAD == vtable->_IO_file_read,程序最终会调用read */
/* 执行read读取数据到fp->_IO_buf_base,读入大小为输入缓冲区的大小 */
if (count <= 0)
{
if (count == 0)
fp->_flags |= _IO_EOF_SEEN;
else
fp->_flags |= _IO_ERR_SEEN, count = 0;
}
fp->_IO_read_end += count; /* 更新输入缓冲区的大小 */
if (count == 0)
return EOF;
if (fp->_offset != _IO_pos_BAD)
_IO_pos_adjust (fp->_offset, count);
return *(unsigned char *) fp->_IO_read_ptr;
}

函数执行完后,返回到 _IO_file_xsgetn 函数中,由于 while 循环的存在,重新执行第二部分,此时将输入缓冲区拷贝至目标缓冲区,最终返回


在 IO_FILE任意读 (基于stdin的地址任意读) 漏洞中,最关键的函数应是 _IO_new_file_underflow ,它里面有个标志位的判断:

1
2
3
4
5
6
if (fp->_flags & _IO_NO_READS)
{
fp->_flags |= _IO_ERR_SEEN;
__set_errno (EBADF);
return EOF;
}

这个漏洞我还不熟,后面熟悉了再慢慢说