Linux 进程管理之current

Linux 进程管理之current
文章目录
前言

由于经常在内核中看到current，便记录下current的实现原理。

一、current简介

current表示当前正在运行进程的进程描述符struct task_struct。

在当前的x86_64和ARM64架构下 current的实现方式和早期Linux 版本current的实现的不一样，目前大多数内核教材都是描述的是早期Linux 版本current的实现方式：通过thread_info结构中的task成员可以获取task_struct结构的值。接下来我们便来讲述目前Linux版本的（比如常用的3.10.0版本）x86_64和ARM64架构下 current的实现原理。

二、x86_64 current 的实现

 2.1 current_task 的实现

x86_64架构下每个 CPU 当前正在运行的任务（task_struct）保存在 per-cpu变量中，当前进程的变量 current_task 就被声明为 per-cpu变量，如下所示：
关于x86_64架构per-cpu变量请参考：Linux per-cpu
```
// linux-4.10.1/arch/x86/include/asm/current.h
DECLARE_PER_CPU(struct task_struct *, current_task);
1
2
```
该per-cpu变量定义在：
```
// linux-4.10.1/arch/x86/kernel/cpu/common.c

/*
 * The following percpu variables are hot.  Align current_task to
 * cacheline size such that they fall in the same cacheline.
 */
DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = &init_task;
1
2
3
4
5
6
7
```
系统刚刚初始化的时候，current_task 都指向 init_task：
```
// linux-4.10.1/init/init_task.c

/* Initial task structure */
struct task_struct init_task = INIT_TASK(init_task);
EXPORT_SYMBOL(init_task);
1
2
3
4
5
```
当某个 CPU 上的进程进行切换的时候，current_task 被修改为将要切换到的目标进程，比如：进程切换函数 __switch_to 就会改变 current_task指向将要切换的进程 struct task_struct *next_p：
```
current_task = next_p;
1
```
```
// linux-4.10.1/arch/x86/kernel/process_64.c

/*switch_to(x,y) should switch tasks from x to y.*/

__visible __notrace_funcgraph struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
	......
	this_cpu_write(current_task, next_p);
	......
	return prev_p;
}
1
2
3
4
5
6
7
8
9
10
11
12
```
2.2 读取current_task
```
// linux-4.10.1/arch/x86/include/asm/current.h

static __always_inline struct task_struct *get_current(void)
{
	return this_cpu_read_stable(current_task);
}

#define current get_current()
1
2
3
4
5
6
7
8
```
```
// linux-4.10.1/arch/x86/include/asm/percpu.h

#define this_cpu_read_stable(var)	percpu_stable_op("mov", var)
1
2
3
```
this_cpu_read() 使 gcc 每次访问var时都加载 percpu 变量，而 this_cpu_read_stable() 允许缓存该值var。
this_cpu_read_stable() 效率更高，如果保证其值在 cpu 中有效，则可以使用它。当前用户包括get_current()和get_thread_info()，它们实际上都是per-thread的变量，用per-cpu变量实现。因此在各自的任务期间是稳定的。
```
// linux-4.10.1/arch/x86/include/asm/percpu.h

#define percpu_stable_op(op, var)			\
({							\
	typeof(var) pfo_ret__;				\
	switch (sizeof(var)) {				\
	case 1:						\
		asm(op "b "__percpu_arg(P1)",%0"	\
		    : "=q" (pfo_ret__)			\
		    : "p" (&(var)));			\
		break;					\
	case 2:						\
		asm(op "w "__percpu_arg(P1)",%0"	\
		    : "=r" (pfo_ret__)			\
		    : "p" (&(var)));			\
		break;					\
	case 4:						\
		asm(op "l "__percpu_arg(P1)",%0"	\
		    : "=r" (pfo_ret__)			\
		    : "p" (&(var)));			\
		break;					\
	case 8:						\
		asm(op "q "__percpu_arg(P1)",%0"	\
		    : "=r" (pfo_ret__)			\
		    : "p" (&(var)));			\
		break;					\
	default: __bad_percpu_size();			\
	}						\
	pfo_ret__;					\
})
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
```
对于x86_64架构，struct task_struct *current_task，指针的大小为8个字节，所以sizeof(var) = 8
```
asm(op "q "__percpu_arg(P1)",%0"	\
    : "=r" (pfo_ret__)			\
    : "p" (&(var)));			\
1
2
3
```
```
// linux-4.10.1/arch/x86/include/asm/percpu.h
#ifdef CONFIG_X86_64
#define __percpu_seg		gs
#define __percpu_mov_op		movq

#ifdef CONFIG_SMP
#define __percpu_prefix		"%%"__stringify(__percpu_seg)":"

#define __percpu_arg(x)		__percpu_prefix "%" #x
1
2
3
4
5
6
7
8
9
```
展开：
```
#define percpu_stable_op(op, var)
{
	//定义一个current_task类型的变量 pfo_ret__
	//等价于 struct task_struct * pfo_ret__
	typeof(current_task) pfo_ret__;
	
	asm("movq" "%%gs:%P1","%0" 
		: "=r" (pfo_ret__) 		//输出部分
		:"p" (&(current_task))  //输入部分
	
	pfo_ret__;	
}
1
2
3
4
5
6
7
8
9
10
11
12
```
宏展开后其实主要就是一条mov指令，将gs寄存器里的地址，和%P1（current_task的地址）相加，即输入列表中的第一个参数。然后将相加后地址指向的内存空间里的值，移动到%0（%eax），即输出列表中的第一个参数 pfo_ret__。

简单说下内联汇编语法：
汇编指令中的数字和前缀%组合表示样板操作数，例如%0，%1等，用来依次指代后面的输出部分，输入部分等样板操作数。
gs是x86中的段寄存器，为了与%0，%1等区分开来，用两个%%来修饰寄存器，即%%gs。
%0表示pfo_ret__，r表示通用寄存器，=表示该操作符只写，一般用于输出操作数中。
%1表示&(current_task)，p表示内存地址。
gcc汇编语法：Using-Assembly-Language-with-C

简单点说就是将gs寄存器里的地址与current_task的地址相加保存在pfo_ret__变量中，然后返回pfo_ret__变量。pfo_val__变量里存放的值，就是当前cpu执行的当前线程对象struct task_struct的地址。这样我们就得到了current_task的地址。

注意：
gs寄存器中存放的是当前cpu的percpu内存块的起始地址：base_address。
current_task的地址表示current_task变量在任意percpu内存块的位置：offest。
所以这两个地址相加：base_address + offest，得到的就是当前cpu的current_task变量的当前地址值。

小结：x86_64架构下每个 CPU 当前运行进程的 task_struct 的指针current_task存放到 Per CPU 变量中; 可调用current（x86_64下实际调用的是 this_cpu_read_stable宏）进行读取。

2.3 struct thread_info

task_struct 和 struct thread_info都用来保存进程相关信息，struct thread_info与体系架构相关。然而不同的体系结构里，进程需要存储的信息不尽相同，linux使用task_struct存储通用的信息，将体系结构相关的部分存储在thread_info中。早期的Linux版本中thread_info 结构在进程内核栈中。

2.3.1 早期内核版本

早期版本的struct thread_info成员有struct task_struct *task，可以通过这个结构体来获取current。
```
// linux-2.6.32/arch/x86/include/asm/thread_info.h
struct thread_info {
	struct task_struct	*task;		/* main task structure */
	struct exec_domain	*exec_domain;	/* execution domain */
	__u32			flags;		/* low level flags */
	__u32			status;		/* thread synchronous flags */
	__u32			cpu;		/* current CPU */
	int			preempt_count;	/* 0 => preemptable,
						   <0 => BUG */
	mm_segment_t		addr_limit;
	struct restart_block    restart_block;
	void __user		*sysenter_return;
	......
	int			uaccess_err;
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
```
通过esp寄存器的值和内核栈大小，就可以方便的计算出内核栈的栈底地址，该地址其实就是进程对应的struct thread_info结构的地址。
```
/* how to get the current stack pointer from C */
register unsigned long current_stack_pointer asm("esp") __used;

/* how to get the thread information struct from C */
static inline struct thread_info *current_thread_info(void)
{
	return (struct thread_info *)
		(current_stack_pointer & ~(THREAD_SIZE - 1));
}
1
2
3
4
5
6
7
8
9
```
用 current_thread_info()->task 来获取 task_struct：
```
#define get_current() (current_thread_info()->task)
#define current get_current()
1
2
```
这时候x86就已经对current的获取进行了优化，采用之前所说的 Per CPU 变量来获取current。
```
// linux-2.6.32/arch/x86/include/asmcurrent.h

struct task_struct;

DECLARE_PER_CPU(struct task_struct *, current_task);

static __always_inline struct task_struct *get_current(void)
{
	return percpu_read_stable(current_task);
}

#define current get_current()
1
2
3
4
5
6
7
8
9
10
11
12
```
小结：在早期的内核中，比如2.6.32，通过current_thread_info()->task得到struct task_struct在x86上也是支持的。所以通过current_thread_info()->task和 Per CPU 变量都可以来获取current。

如图所示：

图片来自：极客时间趣谈 Linux 操作系统

 2.3.2 现在的内核版本
```
// linux-4.10.1/include/linux/sched.h

struct task_struct {
#ifdef CONFIG_THREAD_INFO_IN_TASK
	/*
	 * For reasons of header soup (see current_thread_info()), this
	 * must be the first element of task_struct.
	 */
	struct thread_info thread_info;
#endif
	......
1
2
3
4
5
6
7
8
9
10
11
```
现在较高的内核版本增加一个新的配置选项CONFIG_THREAD_INFO_IN_TASK，允许把thread_info 数据结构存放在struct task_struct结构体中。通过在较高内核版本的Linux发行版中CONFIG_THREAD_INFO_IN_TASK都是默认打开的：
```
# cat /boot/config-5.4.18-35-generic | grep CONFIG_THREAD_INFO_IN_TASK
CONFIG_THREAD_INFO_IN_TASK=y
1
2
```
之所以把thread_info 数据结构从进程内核栈搬移到task_struct数据结构体中，是因为：
（1）在某些清空内核栈溢出的情况下可以防止 thread_info 数据结构被破坏。
（2）如果内核栈的地址被泄露，这种方法可以防止进程被攻击变得困难。

因此也将struct thread_info数据结构中的成员去掉了很多，比如struct task_struct *task，因此也不能用过struct thread_info数据结构来获取struct task_struct *task了。

现在的内核版本struct thread_info已经没有struct task_struct *task成员了，不能再通过current_thread_info()->task来获取current，对于x86_64必须通过之前所说的 Per CPU 变量来获取current。

对于x86_64：
```
// linux-4.10.1/arch/x86/include/asm/thread_info.h

struct thread_info {
	unsigned long		flags;		/* low level flags */
};
1
2
3
4
5
```
对于arm64：
```
// linux-4.10.1/arch/arm64/include/asm/thread_info.h

/*
 * low level task data that entry.S needs immediate access to.
 */
struct thread_info {
	unsigned long		flags;		/* low level flags */
	mm_segment_t		addr_limit;	/* address limit */
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
	u64			ttbr0;		/* saved TTBR0_EL1 */
#endif
	int			preempt_count;	/* 0 => preemptable, <0 => bug */
};
1
2
3
4
5
6
7
8
9
10
11
12
13
```
arm64架构寄存器比较多，是通过寄存器来获取current，在第三章节会介绍。

2.4 current_thread_info

如果配置了CONFIG_THREAD_INFO_IN_TASK选项，调用 current_thread_info获取当前struct thread_info，很简单就是将current指针转化为struct thread_info指针就可以了，因为struct thread_info是struct task_struct的第一个成员，两个结构体的首地址是一样的。

由于较高版本的内核，x86_64架构下struct thread_info成员较少，只有一个，CONFIG_THREAD_INFO_IN_TASK宏都是打开的，struct thread_info直接放在struct task_struct中。
```
// linux-4.10.1/arch/x86/include/asm/thread_info.h

struct thread_info {
	unsigned long		flags;		/* low level flags */
};
1
2
3
4
5
```
```
// linux-4.10.1/include/linux/thread_info.h

#ifdef CONFIG_THREAD_INFO_IN_TASK
/*
 * For CONFIG_THREAD_INFO_IN_TASK kernels we need  for the
 * definition of current, but for !CONFIG_THREAD_INFO_IN_TASK kernels,
 * including  can cause a circular dependency on some platforms.
 */
#include 
#define current_thread_info() ((struct thread_info *)current)
#endif
1
2
3
4
5
6
7
8
9
10
11
```
```
// linux-4.10.1/include/linux/sched.h

struct task_struct {
#ifdef CONFIG_THREAD_INFO_IN_TASK
	/*
	 * For reasons of header soup (see current_thread_info()), this
	 * must be the first element of task_struct.
	 */
	struct thread_info thread_info;
	......
}
1
2
3
4
5
6
7
8
9
10
11
```
三、ARM64 current 的实现

由于ARM64的通用寄存器很多，直接采用通用寄存器来存储 current_task。这样获取current直接读取对应的寄存器便可以了。
```
/*
 * We don't use read_sysreg() as we want the compiler to cache the value where
 * possible.
 */
static __always_inline struct task_struct *get_current(void)
{
	unsigned long sp_el0;

	asm ("mrs %0, sp_el0" : "=r" (sp_el0));

	return (struct task_struct *)sp_el0;
}

#define current get_current()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
```
Armv8-A 架构定义了一组异常级别，EL0 到 EL3：
```
EL0 Applications.
EL1 OS kernel and associated functions that are typically described as privileged.
EL2 Hypervisor.
EL3 Secure monitor.
1
2
3
4
```
In addition, in AArch64 state, most register names include the lowest Exception level that can access the register as a suffix to the register name:
```
 <register_name>_ELx, where x is 0, 1, 2, or 3.
1
```
sp就是堆栈寄存器。在ARM64里，CPU可以运行在四个级别中，分别是el0、el1、el2、el3，el0则就是用户空间，el1则是内核空间。sp_el0就是用户堆栈寄存器。

看一下ARM64架构下进程切换函数 __switch_to ：
```
// linux-4.10.1/arch/arm64/kernel/process.c

/*
 * Thread switching.
 */
struct task_struct *__switch_to(struct task_struct *prev,
				struct task_struct *next)
{
	struct task_struct *last;

	.......

	/* the actual thread switch */
	last = cpu_switch_to(prev, next);

	return last;
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
```
```
// linux-4.10.1/arch/arm64/kernel/entry.S

/*
 * Register switch for AArch64. The callee-saved registers need to be saved
 * and restored. On entry:
 *   x0 = previous task_struct (must be preserved across the switch)
 *   x1 = next task_struct
 * Previous and next are guaranteed not to be the same.
 *
 */
ENTRY(cpu_switch_to)
	mov	x10, #THREAD_CPU_CONTEXT
	add	x8, x0, x10
	mov	x9, sp
	stp	x19, x20, [x8], #16		// store callee-saved registers
	stp	x21, x22, [x8], #16
	stp	x23, x24, [x8], #16
	stp	x25, x26, [x8], #16
	stp	x27, x28, [x8], #16
	stp	x29, x9, [x8], #16
	str	lr, [x8]
	add	x8, x1, x10
	ldp	x19, x20, [x8], #16		// restore callee-saved registers
	ldp	x21, x22, [x8], #16
	ldp	x23, x24, [x8], #16
	ldp	x25, x26, [x8], #16
	ldp	x27, x28, [x8], #16
	ldp	x29, x9, [x8], #16
	ldr	lr, [x8]
	mov	sp, x9
	msr	sp_el0, x1
	ret
ENDPROC(cpu_switch_to)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
```
简单介绍下上述汇编：

STP（stp）：Store Pair of Registers 根据 base register value和 immediate offset 计算地址，并将两个32位字或两个64位双字从两个寄存器存储到计算的地址。
简单点说就是将一对寄存器的值存储到内存地址中。
```
64-bit variant
STP <Xt1>, <Xt2>, [<Xn|SP>], #<imm>
1
2
```
LDP（ldp）：Load Pair of Registers 根据 base register value地址和 immediate offset，从内存中加载两个 32 位字或两个 64 位双字，并将它们写入两个寄存器。
简单点说就是将内存中的值加载到一对寄存器中。
```
64-bit variant
LDP <Xt1>, <Xt2>, [<Xn|SP>], #<imm>
1
2
```
这段汇编的意思是：
（1）x0 = previous task_struct，store callee-saved registers
将previous task_struct 的x19 - x29、 x9、 lr寄存器都存储在内核堆栈中。
（2）x1 = next task_struct，restore callee-saved registers
将next task_struct的x19 - x29、 x9、 lr寄存器从堆栈中恢复。

我们看到了：
```
/* x1 = next task_struct */
msr	sp_el0, x1
1
2
```
其中x1就是next进程的struct task_struct结构，将next task_struct存储在sp_el0寄存器中。在进程切换的时候，把将要运行进程的struct task_struct存储在sp_el0寄存器中，这要直接读取sp_el0寄存器就获得当前正在运行的struct task_struct，即current。

总结

x86_64架构下每个 CPU 当前运行进程的 task_struct 的指针current_task存放到 per-cpu 变量中。
ARM64架构下每个 CPU 当前运行进程的 task_struct 的指针current_task存放到 sp_el0 寄存器中。

任务切换的时候必须要更新一下current，指向 next task_struct。

参考链接

Linux 4.10.0
Linux 2.6.32

ARM64官方文档

https://zhuanlan.zhihu.com/p/340985476
https://www.cnblogs.com/crybaby/p/14082593.html

https://blog.csdn.net/longwang155069/article/details/104346778
https://zhuanlan.zhihu.com/p/296750228
相关阅读:
vue+Fullcalendar
【GlobalMapper精品教程】013：矢量点图层的创建及数字化案例操作
 代码随想录Day_55打卡
 异步和多线程，简单说，多线程是实现异步的方法之一
 软考 --- 数据库（4）SQL语句
 【一起来用C++】————（1）类的练习案例（多文件编写）
SSM整合：SSM+VUE
成本高、落地难、见效慢，开源安全怎么办？
微信小程序的疫苗接种预约设计与实现vue+uniapp
【Nginx】基础概念和核心配置块
原文地址：https://blog.csdn.net/weixin_45030965/article/details/126508703

文章目录

前言

一、current简介

二、x86_64 current 的实现

2.1 current_task 的实现

2.2 读取current_task

2.3 struct thread_info

2.3.1 早期内核版本

2.3.2 现在的内核版本

2.4 current_thread_info

三、ARM64 current 的实现

总结

参考链接