Linux kernel 初探

TOC

  1. 1. 编译内核
  2. 2. 添加自定义syscall
  3. 3. 编译busybox
  4. 4. qemu
  5. 5. 驱动
    1. 5.1. register_chrdev
    2. 5.2. memory.c
  6. 6. 调试
    1. 6.1. vmmap.c

在了解内核之前,先要会编译、调试内核。

编译内核

安装依赖:

sudo apt-get update
sudo apt-get install git fakeroot build-essential ncurses-dev xz-utils libssl-dev bc

下载kernel源代码:https://www.kernel.org/

解压后进入目录,执行下面命令进行配置:

make menuconfig

配置的时候基本什么都不需要改动,直接Save,然后Exit

然后运行下面的命令进行内核编译,该过程会花费较长时间。

make bzImage

编译好之后,在./arch/x86/boot/拿到bzImage,从源码根目录拿到vmlinux

Setup is 17628 bytes (padded to 17920 bytes).
System is 8485 kB
CRC 7bdf0988
Kernel: arch/x86/boot/bzImage is ready (#1)

添加自定义syscall

在源码根目录创建一个新的目录(模块),以经典的helloworld为例。

ex@Ex:~/test/temp/linux-5.1.7$ cd helloworld/
ex@Ex:~/test/temp/linux-5.1.7/helloworld$ tree
.
├── helloworld.c
└── Makefile

0 directories, 2 files
ex@Ex:~/test/temp/linux-5.1.7/helloworld$ cat helloworld.c
#include <linux/kernel.h>

asmlinkage long sys_helloworld(void){
printk("hello world\n");
return 0;
}
ex@Ex:~/test/temp/linux-5.1.7/helloworld$ cat Makefile
obj-y=helloworld.o

编辑源码根目录下的Makefile,加入helloworld模块。

...
PHONY += prepare0

ifeq ($(KBUILD_EXTMOD),)
core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ helloworld/

vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
$(core-y) $(core-m) $(drivers-y) $(drivers-m) \
$(net-y) $(net-m) $(libs-y) $(libs-m) $(virt-y)))

vmlinux-alldirs := $(sort $(vmlinux-dirs) $(patsubst %/,%,$(filter %/, \
$(init-) $(core-) $(drivers-) $(net-) $(libs-) $(virt-))))
...

然后编辑include/linux/syscalls.h,添加helloworld函数原型。

asmlinkage long sys_helloworld(void);

增加在文件末尾即可。

然后再修改arch/x86/entry/syscalls/syscall_32.tblarch/x86/entry/syscalls/syscall_64.tbl,添加自定义的系统调用号。

i386

1000    i386    helloworld  sys_helloworld

amd64

1000    common helloworld sys_helloworld

最后在编译生成新的内核即可。

编译busybox

先到官网上下载源码:https://busybox.net/

下载完成后解压进入源码根目录输入make menuconfig进行配置。

最好在配置时进入Settings,勾上Build static binary (no shared libs),这样就不会依赖libc文件。

ex@Ex:~/test/temp/busybox-1.31.0/_install$ ldd bin/busybox 
not a dynamic executable

如果不勾选的话,需要自行配置libc库,这样步骤会很繁琐。

然后输入make install -j4进行编译,busybox编译要比kernel快很多。

编译完成后会生成一个_install的目录,这就是我们需要的环境。

先进行一些简单的初始化:

cd _install
mkdir proc
mkdir sys
mkdir lib64
mkdir -p lib/x86_64-linux-gnu/
mkdir etc
mkdir home
echo "root:x:0:0:root:/root:/bin/sh" > etc/passwd
echo "root:x:0:" > etc/group
touch etc/shadow
touch etc/gshadow
touch init
chmod +x init

然后把libcld准备好,否则程序需要静态编译才能运行,则会使得生成的程序调试的时候不太方便。

在生成的init初始化脚本中,加入如下内容:

#!/bin/sh
echo "{==DBG==} INIT SCRIPT"
mkdir /tmp
mount -t proc none /proc
mount -t sysfs none /sys
mount -t debugfs none /sys/kernel/debug
mount -t tmpfs none /tmp
mount -t devtmpfs devtmpfs /dev

# insmod /xxx.ko # load ko
mdev -s # We need this to find /dev/sda later
echo -e "{==DBG==} Boot took $(cut -d' ' -f1 /proc/uptime) seconds"

setsid /bin/cttyhack setuidgid 1000 /bin/sh #normal user
# exec /bin/sh #root

poweroff -d 0 -f

然后在_install目录里运行下面的命令进行打包:

find . | cpio -o --format=newc > ../rootfs.img

qemu

通过上面两步,我们得到了含有helloworld syscall的kernel bzImage和用busybox打包的fs(附带了ldlibc)。

接下来只要用qemu启动就ok了。

在这之前,可以先写一个测试程序来测试我们写的syscall。

// compiled: gcc helloworld.c -o helloworld
#include <stdio.h>
#include <unistd.h>

int main()
{
puts("start");
syscall(1000);
puts("end");
return 0;
}

将生成可执行二进制文件helloworld放在_install目录下,重新进行打包。

然后在用qemu启动:

qemu-system-x86_64 -cpu kvm64,+smep -kernel ./bzImage -initrd rootfs.img -nographic -append "console=ttyS0"

运行实例:

/ $ id
uid=1000 gid=1000 groups=1000
/ $ ./helloworld
start
[ 29.085005] hello world
end
/ $

驱动

register_chrdev

int register_chrdev (unsigned int major, const  char *name, struct file_operations*fops); 

在这里,我们指定要注册它的设备的名称和主要编号,之后将链接设备和file_operations结构。 如果我们为主参数指定零,该函数将自己分配一个主设备号(即它返回的值)。 如果返回的值为零,则表示成功,而负数表示错误。 两个设备编号均在0-255范围内指定。

我们将设备名称作为name参数的字符串值传递(如果模块注册单个设备,则此字符串也可以传递模块的名称)。 然后,我们使用此字符串来标识/sys/devices文件中的设备。 读取,写入和保存等设备文件操作由存储在file_operations结构中的函数指针处理。 这些函数由模块实现,并且指向标识该模块的module结构的指针也存储在file_operations结构中。

来自源码:linux-5.2.7/include/linux/fs.h:1791

struct file_operations {
struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iopoll)(struct kiocb *kiocb, bool spin);
int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
unsigned long mmap_supported_flags;
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
int (*release) (struct inode *, struct file *);
int (*fsync) (struct file *, loff_t, loff_t, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
int (*flock) (struct file *, int, struct file_lock *);
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
int (*setlease)(struct file *, long, struct file_lock **, void **);
long (*fallocate)(struct file *file, int mode, loff_t offset,
loff_t len);
void (*show_fdinfo)(struct seq_file *m, struct file *f);
#ifndef CONFIG_MMU
unsigned (*mmap_capabilities)(struct file *);
#endif
ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
loff_t, size_t, unsigned int);
loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags);
int (*fadvise)(struct file *, loff_t, loff_t, int);
} __randomize_layout;

如果file_operations结构包含一些不需要的函数,您仍然可以使用该文件而不实现它们。 指向未实现函数的指针可以简单地设置为零。 之后,系统将负责该功能的实现并使其正常运行。

字符设备模块使用insmod加载,加载完毕需要在/dev目录下使用mkmod命令建立相应的文件结点

编写驱动程序:

memory.c

#include <linux/init.h>

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/proc_fs.h>
#include <linux/fcntl.h>

#include <linux/uaccess.h>

MODULE_LICENSE("Dual BSD/GPL");

int memory_open(struct inode *inode, struct file *filp);
int memory_release(struct inode *inode, struct file *filp);
ssize_t memory_read(struct file *filp, char *buf, size_t count, loff_t *f_pos);
ssize_t memory_write(struct file *filp, const char *buf, size_t count, loff_t *f_pos);
void memory_exit(void);
int memory_init(void);

struct file_operations memory_fops = {
read : memory_read,
write : memory_write,
open : memory_open,
release : memory_release
};

module_init(memory_init);
module_exit(memory_exit);

int memory_major = 60;

char *memory_buffer;

int used = 0;

#define LENGTH 0x1000

int memory_init(void)
{
int result;

result = register_chrdev(memory_major, "memory", &memory_fops);
if (result < 0)
{
printk("<1>memory: can't obtain major number %d\n", memory_major);
return result;
}

memory_buffer = kmalloc(LENGTH, GFP_KERNEL);
if (!memory_buffer)
{
result = -ENOMEM;
goto fail;
}
memset(memory_buffer, 0, LENGTH);

printk("<1>Inserting memory module\n");
return 0;

fail:
memory_exit();
return result;
}

void memory_exit(void)
{
unregister_chrdev(memory_major, "memory");

if (memory_buffer)
kfree(memory_buffer);

printk("<1>Removing memory module\n");
}

int memory_open(struct inode *inode, struct file *filp)
{
printk("<1>Open\n");
return 0;
}

int memory_release(struct inode *inode, struct file *filp)
{
printk("<1>Release\n");
return 0;
}

ssize_t memory_read(struct file *filp, char *buf,
size_t count, loff_t *f_pos)
{
int bytes;

if(used > count && used > 0)
{
used -= count;
bytes = count;
copy_to_user(buf, memory_buffer, bytes);
}
else if(used > 0)
{
bytes = used;
used = 0;
copy_to_user(buf, memory_buffer, bytes);
}

return bytes;
}

ssize_t memory_write(struct file *filp, const char *buf,
size_t count, loff_t *f_pos)
{
int bytes = 0;

if(used + count < LENGTH)
{
used += count;
bytes = count;
copy_from_user(memory_buffer, buf, bytes);
}
else if(used < LENGTH)
{
bytes = LENGTH - used;
used = LENGTH;
copy_from_user(memory_buffer, buf, bytes);
}


return bytes;
}

上面的驱动可以看成一个简单的字符仓库,如果放满了字符就放不进去,如果是空的也拿不出来。

驱动源码并不能用gcc直接进行编译,需要生成一个Makefile来进行编译。

TARGET_MODULE:=memorys
PWD:=$(shell pwd)
# KERNELDIR := /lib/modules/$(shell uname -r)/build
KERNELDIR:=./linux-4.15

$(TARGET_MODULE)-objs := memory.o
obj-m := $(TARGET_MODULE).o

all:
$(MAKE) -C $(KERNELDIR) M=$(PWD) modules

clean:
rm -rf *.o *~ core .depend .*.cmd *.ko *.mod.c .tmp_versions *.order *.symvers

对应的内核要编译相对应的驱动才能载入,否则会失败。

编译好会生成一个memorys.ko的驱动。

这时我们可以把驱动复制到_install根目录,然后在我们的init脚本中加入下面两条命令,重新生成镜像。

insmod /memorys.ko
mknod /dev/memorys c 60 0

60 为我们设置的主设备号

运行实例:

/ # ls
bin lib memorys.ko sbin usr
dev lib64 proc sys
init linuxrc root tmp
/ # echo 1234567890 > /dev/memorys
[ 25.850176] <1>Open
[ 25.855288] <1>Release
/ # cat /dev/memorys
[ 31.262535] <1>Open
1234567890
[ 31.266417] <1>Release
/ #

上面的编译方式是早起驱动开发常用的。

根据新的资料,我重新编写了一个自动挂载的驱动,代码如下:

test_src.c

#include <linux/init.h>

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/proc_fs.h>
#include <linux/fcntl.h>
#include<linux/cdev.h>
#include <linux/device.h>

#include <linux/uaccess.h>

MODULE_LICENSE("Dual BSD/GPL");

int test_open(struct inode *inode, struct file *filp);
int test_release(struct inode *inode, struct file *filp);
ssize_t test_read(struct file *filp, char *buf, size_t count, loff_t *f_pos);
ssize_t test_write(struct file *filp, const char *buf, size_t count, loff_t *f_pos);
long test_ioctl (struct file *filp, unsigned int cmd, unsigned long arg);
void test_exit(void);
int test_init(void);

struct file_operations test_fops = {
.read = test_read,
.write = test_write,
.open = test_open,
.release = test_release,
.unlocked_ioctl = test_ioctl,
.owner = THIS_MODULE
};

module_init(test_init);
module_exit(test_exit);

char *test_buffer;

int used = 0;

#define LENGTH 0x1000

dev_t test_major;
struct cdev test_cdev;
struct class *test_class;

int test_init(void)
{
int result;

if(alloc_chrdev_region(&test_major, 0, 1, "test") >= 0)
{
cdev_init(&test_cdev, &test_fops);
test_cdev.owner = THIS_MODULE;
result = cdev_add(&test_cdev, test_major, 1);
if(result)
{
printk(KERN_ERR "cedv_add error\n");
unregister_chrdev_region(test_major, 1LL);
return -1;
}
else
{
test_class = class_create(THIS_MODULE, "test");
if(test_class)
{
result = device_create(test_class, NULL, test_major, NULL, "test");
if(result)
{
printk(KERN_INFO "Register success\n");
test_buffer = kmalloc(LENGTH, GFP_KERNEL);
return 0;
}
else
{
printk(KERN_ERR "device_create error\n");
class_destroy(test_class);
cdev_del(&test_cdev);
unregister_chrdev_region(test_major, 1LL);
return -1;
}
}
else
{
printk(KERN_ERR "class_create error\n");
cdev_del(&test_cdev);
unregister_chrdev_region(test_major, 1LL);
return -1;
}
}

}
}

void test_exit(void)
{
device_destroy(test_class, test_major);
class_destroy(test_class);
cdev_del(&test_cdev);
unregister_chrdev_region(test_major, 1LL);
unregister_chrdev(test_major, "test");

if (test_buffer)
kfree(test_buffer);

printk(KERN_INFO "Removing test module\n");
}

int open_times = 0;
int release_times = 0;

long test_ioctl (struct file *filp, unsigned int cmd, unsigned long arg)
{
return 0;
}

int test_open(struct inode *inode, struct file *filp)
{
printk(KERN_INFO "Open %d times\n", release_times);
open_times++;
return 0;
}

int test_release(struct inode *inode, struct file *filp)
{
printk(KERN_INFO "Release %d times\n", release_times);
release_times++;
return 0;
}

ssize_t test_read(struct file *filp, char *buf,
size_t count, loff_t *f_pos)
{
int bytes;

if (used > count && used > 0)
{
used -= count;
bytes = count;
copy_to_user(buf, test_buffer, bytes);
}
else if (used > 0)
{
bytes = used;
used = 0;
copy_to_user(buf, test_buffer, bytes);
}

return bytes;
}

ssize_t test_write(struct file *filp, const char *buf,
size_t count, loff_t *f_pos)
{
int bytes = 0;

if (used + count < LENGTH)
{
used += count;
bytes = count;
copy_from_user(test_buffer, buf, bytes);
}
else if (used < LENGTH)
{
bytes = LENGTH - used;
used = LENGTH;
copy_from_user(test_buffer, buf, bytes);
}

return bytes;
}

其对应的Makefile如下:

TARGET_MODULE:=test
PWD:=$(shell pwd)
# KERNELDIR := /lib/modules/$(shell uname -r)/build
KERNELDIR:=./linux-5.2.7

$(TARGET_MODULE)-objs := test_src.o
obj-m := $(TARGET_MODULE).o

all:
$(MAKE) -C $(KERNELDIR) M=$(PWD) modules

clean:
rm -rf *.o *~ core .depend .*.cmd *.ko *.mod.c .tmp_versions *.order *.symvers

调试

一般来说加nokaslrkaslr关了调试起来会方便一些。否则gdb将找不到ELF基地址(毕竟不是本地)。

-append "console=ttyS0 nokaslr"

但是调试驱动时,即使关闭了kalsr,gdb也无法确定其基地址,这时候我们需要用add-symbol-file来手动添加基地址。

下面我写了一个程序方便快速读取驱动的基地址信息:

vmmap.c

// musl-gcc -static vmmap.c -O3 -s -o vmmap
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <dirent.h>

inline static void truncate_string(char *str)
{
while(!(*str == '\n' || *str == '\0'))
{
str++;
}
if(*str == '\n')
{
*str = '\0';
}
}

int main(int argc, char const *argv[])
{
DIR * dir;
struct dirent *ptr[0x100], *temp;
int i, num = 0, text_position = -1;
FILE *fp;
char buf[0x400];

memset(ptr, 0, sizeof(ptr));
setvbuf(stdin, NULL, _IONBF, 0);
setvbuf(stdout, NULL, _IONBF, 0);
setvbuf(stderr, NULL, _IONBF, 0);

if(argc < 2)
{
fprintf(stderr, "Usage: ./vmmap file-path\n");
exit(1);
}

if(chdir(argv[1]) == -1)
{
fprintf(stderr, "chdir error: %m\n");
exit(1);
}

dir = opendir(argv[1]);

if(dir == NULL)
{
fprintf(stderr, "opendir error: %m\n");
exit(1);
}

for(i = 0; i < 0x100; i++)
{
temp = readdir(dir);
if(temp == NULL)
{
break;
}

if(temp->d_type == DT_REG)
{
if(!strcmp(".text", temp->d_name))
{
text_position = num;
ptr[num] = temp;
}
else
{
ptr[num] = temp;
}
num++;
}
}

if(text_position == -1)
{
fprintf(stderr, "Error: don't find .text\n");
exit(1);
}

fp = fopen(ptr[text_position]->d_name, "rb");
if(fp == NULL)
{
fprintf(stderr, "fopen error: %m\n");
exit(1);
}
fgets(buf, 0x400, fp);
truncate_string(buf);
printf(" %s ", buf);
fclose(fp);

for(i = 0; i < num; i++)
{
if(i == text_position)
{
continue;
}

fp = fopen(ptr[i]->d_name, "rb");
if(fp == NULL)
{
fprintf(stderr, "fopen error: %m\n");
exit(1);
}
fgets(buf, 0x400, fp);
truncate_string(buf);
printf("-s %s %s ", ptr[i]->d_name, buf);
fclose(fp);
}
puts("");

return 0;
}

使用方法如下:

/ # /vmmap /sys/module/test/sections
0xffffffffc00fd000 -s .note.Linux 0xffffffffc00fe138 -s .strtab 0xffffffffc01026a8 -s __mcount_loc 0xffffffffc00fe024 -s .bss 0xffffffffc00ff480 -s .gnu.linkonce.this_module 0xffffffffc00ff140 -s .symtab 0xffffffffc0102000 -s .note.gnu.build-id 0xffffffffc00fe000 -s .data 0xffffffffc00ff000 -s __bug_table 0xffffffffc00ff100 -s .rodata.str1.1 0xffffffffc00fe05c -s .rodata.str1.8 0xffffffffc00fe110

可以写一个脚本来快速连接:

#!/bin/sh
gdb -q \
-ex "file ./vmlinux" \
-ex "add-symbol-file ./test.ko 0x.... -s .bss 0x...." \
-ex "target remote localhost:1000"

那么结合上面写的vmmap其脚本就是下面这个样子:

#!/bin/sh
gdb -q \
-ex "file ./vmlinux" \
-ex "add-symbol-file ./test.ko 0xffffffffc00fd000 -s .note.Linux 0xffffffffc00fe138 -s .strtab 0xffffffffc01026a8 -s __mcount_loc 0xffffffffc00fe024 -s .bss 0xffffffffc00ff480 -s .gnu.linkonce.this_module 0xffffffffc00ff140 -s .symtab 0xffffffffc0102000 -s .note.gnu.build-id 0xffffffffc00fe000 -s .data 0xffffffffc00ff000 -s __bug_table 0xffffffffc00ff100 -s .rodata.str1.1 0xffffffffc00fe05c -s .rodata.str1.8 0xffffffffc00fe110" \
-ex "target remote localhost:1000"

资料来源:

  1. 教你写Linux设备驱动程序:一个简短的教程
  2. Linux驱动程序教程:如何编写简单的Linux设备驱动程序
  3. kernel环境配置