进程描述符task_struct
- 进程描述符(struct task_struct)
- pid与tgid
- 进程id编号分配规则
- 内存管理mm_struct
- 进程与文件,文件系统
进程,线程创建的本质
- clone函数原型
- 线程创建的实现
- 进程创建的实现
总结
进程描述符task_struct
进程描述符(struct task_struct)
task_struct称为进程描述符结构,该结构定义在<include/linux/sched.h>文件中。进程描述符中包含一个具体进程的所有信息
进程描述符中包含的数据能完整地描述一个正在执行的程序:它打开的文件,进程的地址空间,挂起的信号,进程的状态等
struct task_struct {volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */void *stack;atomic_t usage;unsigned int flags; /* per process flags, defined below */unsigned int ptrace;#ifdef CONFIG_SMPstruct llist_node wake_entry;int on_cpu;
#endifint on_rq;int prio, static_prio, normal_prio;unsigned int rt_priority;const struct sched_class *sched_class;struct sched_entity se;struct sched_rt_entity rt;
#ifdef CONFIG_CGROUP_SCHEDstruct task_group *sched_task_group;
#endif#ifdef CONFIG_PREEMPT_NOTIFIERS/* list of struct preempt_notifier: */struct hlist_head preempt_notifiers;
#endif/** fpu_counter contains the number of consecutive context switches* that the FPU is used. If this is over a threshold, the lazy fpu* saving becomes unlazy to save the trap. This is an unsigned char* so that after 256 times the counter wraps and the behavior turns* lazy again; this to deal with bursty apps that only use FPU for* a short time*/unsigned char fpu_counter;
#ifdef CONFIG_BLK_DEV_IO_TRACEunsigned int btrace_seq;
#endifunsigned int policy;cpumask_t cpus_allowed;#ifdef CONFIG_PREEMPT_RCUint rcu_read_lock_nesting;char rcu_read_unlock_special;struct list_head rcu_node_entry;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_TREE_PREEMPT_RCUstruct rcu_node *rcu_blocked_node;
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
#ifdef CONFIG_RCU_BOOSTstruct rt_mutex *rcu_boost_mutex;
#endif /* #ifdef CONFIG_RCU_BOOST */#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)struct sched_info sched_info;
#endifstruct list_head tasks;
#ifdef CONFIG_SMPstruct plist_node pushable_tasks;
#endifstruct mm_struct *mm, *active_mm;
#ifdef CONFIG_COMPAT_BRKunsigned brk_randomized:1;
#endif
#if defined(SPLIT_RSS_COUNTING)struct task_rss_stat rss_stat;
#endif
/* task state */int exit_state;int exit_code, exit_signal;int pdeath_signal; /* The signal sent when the parent dies */unsigned int jobctl; /* JOBCTL_*, siglock protected *//* ??? */unsigned int personality;unsigned did_exec:1;unsigned in_execve:1; /* Tell the LSMs that the process is doing an* execve */unsigned in_iowait:1;/* Revert to default priority/policy when forking */unsigned sched_reset_on_fork:1;unsigned sched_contributes_to_load:1;#ifdef CONFIG_GENERIC_HARDIRQS/* IRQ handler threads */unsigned irq_thread:1;
#endifpid_t pid;pid_t tgid;#ifdef CONFIG_CC_STACKPROTECTOR/* Canary value for the -fstack-protector gcc feature */unsigned long stack_canary;
#endif/* * pointers to (original) parent process, youngest child, younger sibling,* older sibling, respectively. (p->father can be replaced with * p->real_parent->pid)*/struct task_struct __rcu *real_parent; /* real parent process */struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports *//** children/sibling forms the list of my natural children*/struct list_head children; /* list of my children */struct list_head sibling; /* linkage in my parent's children list */struct task_struct *group_leader; /* threadgroup leader *//** ptraced is the list of tasks this task is using ptrace on.* This includes both natural children and PTRACE_ATTACH targets.* p->ptrace_entry is p's link on the p->parent->ptraced list.*/struct list_head ptraced;struct list_head ptrace_entry;/* PID/PID hash table linkage. */struct pid_link pids[PIDTYPE_MAX];struct list_head thread_group;struct completion *vfork_done; /* for vfork() */int __user *set_child_tid; /* CLONE_CHILD_SETTID */int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */cputime_t utime, stime, utimescaled, stimescaled;cputime_t gtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTINGcputime_t prev_utime, prev_stime;
#endifunsigned long nvcsw, nivcsw; /* context switch counts */struct timespec start_time; /* monotonic time */struct timespec real_start_time; /* boot based time */
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */unsigned long min_flt, maj_flt;struct task_cputime cputime_expires;struct list_head cpu_timers[3];/* process credentials */const struct cred __rcu *real_cred; /* objective and real subjective task* credentials (COW) */const struct cred __rcu *cred; /* effective (overridable) subjective task* credentials (COW) */struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */char comm[TASK_COMM_LEN]; /* executable name excluding path- access with [gs]et_task_comm (which lockit with task_lock())- initialized normally by setup_new_exec */
/* file system info */int link_count, total_link_count;
#ifdef CONFIG_SYSVIPC
/* ipc stuff */struct sysv_sem sysvsem;
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
/* hung task detection */unsigned long last_switch_count;
#endif
/* CPU-specific state of this task */struct thread_struct thread;
/* filesystem information */struct fs_struct *fs;
/* open file information */struct files_struct *files;
/* namespaces */struct nsproxy *nsproxy;
/* signal handlers */struct signal_struct *signal;struct sighand_struct *sighand;sigset_t blocked, real_blocked;sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */struct sigpending pending;unsigned long sas_ss_sp;size_t sas_ss_size;int (*notifier)(void *priv);void *notifier_data;sigset_t *notifier_mask;struct audit_context *audit_context;
#ifdef CONFIG_AUDITSYSCALLuid_t loginuid;unsigned int sessionid;
#endifseccomp_t seccomp;/* Thread group tracking */u32 parent_exec_id;u32 self_exec_id;
/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,* mempolicy */spinlock_t alloc_lock;/* Protection of the PI data structures: */raw_spinlock_t pi_lock;#ifdef CONFIG_RT_MUTEXES/* PI waiters blocked on a rt_mutex held by this task */struct plist_head pi_waiters;/* Deadlock detection and priority inheritance handling */struct rt_mutex_waiter *pi_blocked_on;
#endif#ifdef CONFIG_DEBUG_MUTEXES/* mutex deadlock detection */struct mutex_waiter *blocked_on;
#endif
#ifdef CONFIG_TRACE_IRQFLAGSunsigned int irq_events;unsigned long hardirq_enable_ip;unsigned long hardirq_disable_ip;unsigned int hardirq_enable_event;unsigned int hardirq_disable_event;int hardirqs_enabled;int hardirq_context;unsigned long softirq_disable_ip;unsigned long softirq_enable_ip;unsigned int softirq_disable_event;unsigned int softirq_enable_event;int softirqs_enabled;int softirq_context;
#endif
#ifdef CONFIG_LOCKDEP
# define MAX_LOCK_DEPTH 48ULu64 curr_chain_key;int lockdep_depth;unsigned int lockdep_recursion;struct held_lock held_locks[MAX_LOCK_DEPTH];gfp_t lockdep_reclaim_gfp;
#endif/* journalling filesystem info */void *journal_info;/* stacked block device info */struct bio_list *bio_list;#ifdef CONFIG_BLOCK
/* stack plugging */struct blk_plug *plug;
#endif/* VM state */struct reclaim_state *reclaim_state;struct backing_dev_info *backing_dev_info;struct io_context *io_context;unsigned long ptrace_message;siginfo_t *last_siginfo; /* For ptrace use. */struct task_io_accounting ioac;
#if defined(CONFIG_TASK_XACCT)u64 acct_rss_mem1; /* accumulated rss usage */u64 acct_vm_mem1; /* accumulated virtual memory usage */cputime_t acct_timexpd; /* stime + utime since last update */
#endif
#ifdef CONFIG_CPUSETSnodemask_t mems_allowed; /* Protected by alloc_lock */seqcount_t mems_allowed_seq; /* Seqence no to catch updates */int cpuset_mem_spread_rotor;int cpuset_slab_spread_rotor;
#endif
#ifdef CONFIG_CGROUPS/* Control Group info protected by css_set_lock */struct css_set __rcu *cgroups;/* cg_list protected by css_set_lock and tsk->alloc_lock */struct list_head cg_list;
#endif
#ifdef CONFIG_FUTEXstruct robust_list_head __user *robust_list;
#ifdef CONFIG_COMPATstruct compat_robust_list_head __user *compat_robust_list;
#endifstruct list_head pi_state_list;struct futex_pi_state *pi_state_cache;
#endif
#ifdef CONFIG_PERF_EVENTSstruct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];struct mutex perf_event_mutex;struct list_head perf_event_list;
#endif
#ifdef CONFIG_NUMAstruct mempolicy *mempolicy; /* Protected by alloc_lock */short il_next;short pref_node_fork;
#endifstruct rcu_head rcu;/** cache last used pipe for splice*/struct pipe_inode_info *splice_pipe;
#ifdef CONFIG_TASK_DELAY_ACCTstruct task_delay_info *delays;
#endif
#ifdef CONFIG_FAULT_INJECTIONint make_it_fail;
#endif/** when (nr_dirtied >= nr_dirtied_pause), it's time to call* balance_dirty_pages() for some dirty throttling pause*/int nr_dirtied;int nr_dirtied_pause;unsigned long dirty_paused_when; /* start of a write-and-pause period */#ifdef CONFIG_LATENCYTOPint latency_record_count;struct latency_record latency_record[LT_SAVECOUNT];
#endif/** time slack values; these are used to round up poll() and* select() etc timeout values. These are in nanoseconds.*/unsigned long timer_slack_ns;unsigned long default_timer_slack_ns;struct list_head *scm_work_list;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER/* Index of current stored address in ret_stack */int curr_ret_stack;/* Stack of return addresses for return function tracing */struct ftrace_ret_stack *ret_stack;/* time stamp for last schedule */unsigned long long ftrace_timestamp;/** Number of functions that haven't been traced* because of depth overrun.*/atomic_t trace_overrun;/* Pause for the tracing */atomic_t tracing_graph_pause;
#endif
#ifdef CONFIG_TRACING/* state flags for use by tracers */unsigned long trace;/* bitmask and counter of trace recursion */unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
#ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */struct memcg_batch_info {int do_batch; /* incremented when batch uncharge started */struct mem_cgroup *memcg; /* target memcg of uncharge */unsigned long nr_pages; /* uncharged usage */unsigned long memsw_nr_pages; /* uncharged mem+swap usage */} memcg_batch;
#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINTatomic_t ptrace_bp_refcnt;
#endif
};
pid与tgid
tgid全名thread group ID,一个内部有多线程的进程,进程中每个线程的id都不一样,但是对外表现出同一个进程整体
struct task_struct{pid_t pid;//进程的唯一标识pid_t tgid;// 线程组的领头线程的pid成员的值
};
进程id编号分配规则
Linux 内核限制进程号需小于等于 32767。新进程创建时,内核会按顺序将下一个可用的进程号分配给其使用。每当进程号达到 32767 的限制时,内核将重置进程号计数器,以便从小整数开始分配。
一旦进程号达到 32767,会将进程号计数器重置为 300,而不是 1。之所以如此,是因为低数值的进程号为系统进程和守护进程所长期占用,在此范围内搜索尚未使用的进程号只会是浪费时间。
内存管理mm_struct
struct task_struct{struct mm_struct* mm;
}
每个进程都有自己独立的虚拟地址空间,使用mm_struct结构体来管理内存,这里的mm指针指向了mm_struct结构体,包含了内存资源的页表,内存映射等
struct mm_struct{struct vm_area_struct* mmap;struct re_root mm_rb;//...pgd_t* pgd; }
进程与文件,文件系统
task_struct与文件相关的字段最常用的下面这两个
struct task_struct{//文件系统的信息的指针,包含了进程运行的目录信息struct fs_struct* fs;//打开的文件描述符资源表struct files_struct* files;
}
进程,线程创建的本质
fork()和pthread_create()函数最后都会进入clone()系统调用
clone函数原型
- fn:表示clone生成的子进程的起始调用函数,参数由第四个参数arg指定
- stack:表示生成的子进程的栈空间
- flags:关键参数,用于区分生成的子进程与父进程如何共享资源(内存,打开文件描述符等)
- 剩下的参数与线程实现有关
int clone(int (*fn)(void *), void *stack, int flags, void *arg, .../* pid_t *parent_tid, void *tls, pid_t *child_tid */ );
线程创建的实现pthread_create()
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <pthread.h>void* run(void* arg){}int main()
{pthread_t t1;pthread_create(&t1, 0, &run, 0);pthread_join(t1, 0);return 0;
}
此时clone系统调用的flags=CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | …
标志 | 含义 |
---|---|
CLONE_VM | 共享虚拟内存 |
CLONE_FS | 共享文件与系统相关的属性 |
CLONE_FILES | 共享打开的文件描述符 |
CLONE_SIGHAND | 共享对信号的处置 |
CLONE_THREAD | 置于父进程所属的线程组中 |
进程创建的实现fork()
#include <sys/wait.h>
#include <unistd.h>int main()
{pid_t pid;pid = fork();if(pid == 0){//此处是子进程的代码分支}else if(pid > 0){//此处是父进程的代码分支}return 0;
}
此时clone系统调用的flags=CLONE_SIGCHLD | …
本质:不共享资源,使用cow,任何一个修改都会造成分裂
标志 | 含义 |
---|---|
CLONE_SIGCHLD | 接收子进程退出的信号 |
总结
- fork()和pthread_create()创建进程或者线程都会调用clone()系统调用
- pthread_create()调用clone()时传入的flags参数会设置共享虚拟内存,共享文件与系统相关的属性,共享打开的文件描述符,共享对信号的处置,置于父进程所属的线程组中
- fork()调用clone()时传入的flags参数只会设置接收子进程退出的信号
- 在内核态中没有进程和线程的概念,内核不会区分进程和线程的操作