linux内核中断管理分为上下半部机制(top half,bottom half)。中断上半部主要关注中断的响应,而把具体需要关注的任务放到中断下半部中来处理,其中网卡MAC控制器中断的处理是使用软中断来进行下半部处理的典型。
硬件中断属于上半部的范畴,而软中断,tasklet和工作队列等为下半部机制。中断处理程序ISR中不允许进行睡眠,同理在软中断中也是不允许进行睡眠。
1、SoftIRQ软中断
软中断是linux内核很早引入的机制。软中断是预留给系统中对时间要求最为严格和最重要的下半部使用的,而且目前驱动中只有块设备和网络子系统使用了软中断。
系统静态定义了若干种软中断类型,并且linux内核开发者不希望用户再扩充新的软中断类型,如有需要,建议使用tasklet机制。
1.1、数据结构
/* PLEASE, avoid to allocate new softirqs, if you need not _really_ highfrequency threaded job scheduling. For almost all the purposestasklets are more than enough. F.e. all serial device BHs etal. should be converted to tasklets, not to softirqs.*/enum
{HI_SOFTIRQ=0,TIMER_SOFTIRQ,NET_TX_SOFTIRQ,NET_RX_SOFTIRQ,BLOCK_SOFTIRQ,BLOCK_IOPOLL_SOFTIRQ,TASKLET_SOFTIRQ,SCHED_SOFTIRQ,HRTIMER_SOFTIRQ,RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */NR_SOFTIRQS
};
通过枚举类型来静态声明软中断,并且每一种软中断都使用索引来表示一种相对的优先级,索引号越小,中断优先级越高,并在一轮软中断中得到优先执行。struct softirq_action
{void (*action)(struct softirq_action *);
};static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;typedef struct {unsigned int __softirq_pending;
#ifdef CONFIG_SMPunsigned int ipi_irqs[NR_IPI];
#endif
} ____cacheline_aligned irq_cpustat_t;irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
1.2、接口说明
/*注册软中断,nr为软中断序号*/
void open_softirq(int nr, void (*action)(struct softirq_action *))
{softirq_vec[nr].action = action;
}/*主动触发一个软中断*/
void raise_softirq(unsigned int nr)
{unsigned long flags;local_irq_save(flags); /*关本地中断*/raise_softirq_irqoff(nr);local_irq_restore(flags); /*开本地中断*/
}/** This function must run with irqs disabled!*/
inline void raise_softirq_irqoff(unsigned int nr)
{__raise_softirq_irqoff(nr);/** If we're in an interrupt or softirq, we're done* (this also catches softirq-disabled code). We will* actually run the softirq once we return from* the irq or softirq.** Otherwise we wake up ksoftirqd to make sure we* schedule the softirq soon.*/if (!in_interrupt()) /*非中断上下文即运行在进程上下文*/wakeup_softirqd(); /*唤醒ksoftirqd%u*/
}raise_softirq和raise_softirq_irqoff差别在于是否关闭本地中断。void __raise_softirq_irqoff(unsigned int nr)
{trace_softirq_raise(nr);or_softirq_pending(1UL << nr);
}#define __IRQ_STAT(cpu, member) (irq_stat[cpu].member)
#define local_softirq_pending() __IRQ_STAT(smp_processor_id(), __softirq_pending)#define set_softirq_pending(x) (local_softirq_pending() = (x))
#define or_softirq_pending(x) (local_softirq_pending() |= (x))raise_softirq:主动设置softirq处于pending状态,如果当前处于非中断上下文时(非硬中断也非软中断上下文)唤醒ksoftirqd%u内核线程来处理软中断事件。
中断退出时,irq_exit()函数会检查当前是否有pending等待的软中断。
/** Exit an interrupt context. Process softirqs if needed and possible:*/
void irq_exit(void)
{
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLEDlocal_irq_disable();
#elseWARN_ON_ONCE(!irqs_disabled());
#endifaccount_irq_exit_time(current);/*preempt_count上退出硬中断*/preempt_count_sub(HARDIRQ_OFFSET);if (!in_interrupt()/*非中断上下文*/ && local_softirq_pending()/*存在本地软中断pending*/)invoke_softirq();tick_irq_exit();rcu_irq_exit();trace_hardirq_exit(); /* must be last! */
}
/*硬+软+NMI(不可屏蔽)中断上下文(中断上下文)*/
#define in_interrupt() (irq_count())
#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK | NMI_MASK))
preempt_count_sub(HARDIRQ_OFFSET)将preempt_count计数减去HARDIRQ_OFFSET,表明退出硬中断上下文。
注意这里有一个判断条件为!in_interrupt(),也就是说,中断退出时不能处于硬件中断上下文(Hardirq context)和软件中断上下文(Softirq context)中。
硬件中断处理过程中一般都是关中断的,中断退出时也就退出了硬件中断上下文,因此该条件会满足。还有一个场景,如果本次中断点发生在一个软中断处理过程中,那么中断退出时会返回到软中断上下文中,因此这种情况下不允许重新调度软中断,因为软中断在一个CPU上总是串行执行。
static inline void invoke_softirq(void)
{/*内核配置了CONFIG_IRQ_FORCED_THREADING=y,force_irqthreads为true.退出中断时总是唤醒ksoftirqd%u内核线程来执行软中断的处理*/if (!force_irqthreads) {
#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK/** We can safely execute softirq on the current stack if* it is the irq stack, because it should be near empty* at this stage.*/__do_softirq();
#else/** Otherwise, irq_exit() is called on the task stack that can* be potentially deep already. So call softirq in its own stack* to prevent from any overrun.*/do_softirq_own_stack();
#endif} else {/*唤醒软中断内核线程ksoftirqd%u来软中断*/wakeup_softirqd();}
}
static inline void do_softirq_own_stack(void)
{__do_softirq();
}#define MAX_SOFTIRQ_TIME msecs_to_jiffies(2)
#define MAX_SOFTIRQ_RESTART 10
asmlinkage __visible void __do_softirq(void)
{unsigned long end = jiffies + MAX_SOFTIRQ_TIME /*2ms*/;unsigned long old_flags = current->flags;int max_restart = MAX_SOFTIRQ_RESTART /*10*/;struct softirq_action *h;bool in_hardirq;__u32 pending;int softirq_bit;/** Mask out PF_MEMALLOC s current task context is borrowed for the* softirq. A softirq handled such as network RX might set PF_MEMALLOC* again if the socket is related to swap*//*PF_MEMALLOC目前主要用在两个地方,一是直接内存压缩的内核路径,二是网络子系统在分配skbuff失败时会设置PF_MEMALLOC标志位*/current->flags &= ~PF_MEMALLOC;pending = local_softirq_pending();account_irq_enter_time(current);__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); /*增加preempt_count中的SOFTIRQ域计数,表明现在是在软中断上下文*/in_hardirq = lockdep_softirq_start();restart:/* Reset the pending bitmask before enabling irqs */set_softirq_pending(0);local_irq_enable(); /*开本地中断,执行软中断时是开本地中断的*/h = softirq_vec;while ((softirq_bit = ffs(pending)/*ffs:find first set bit*/)) {unsigned int vec_nr;int prev_count;h += softirq_bit - 1; /*指向软中断类型对应的数组[]*/vec_nr = h - softirq_vec;prev_count = preempt_count();kstat_incr_softirqs_this_cpu(vec_nr);trace_softirq_entry(vec_nr);h->action(h); /*执行对应的action*/trace_softirq_exit(vec_nr);if (unlikely(prev_count != preempt_count())) { /*抢占次数发生改变*/pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",vec_nr, softirq_to_name[vec_nr], h->action,prev_count, preempt_count());preempt_count_set(prev_count);}h++;pending >>= softirq_bit;}rcu_bh_qs();local_irq_disable(); /*关本地中断*//*中途如果又产生了软中断*/pending = local_softirq_pending();if (pending) {if (time_before(jiffies, end)/*执行时间未超过2ms*/ && !need_resched()/*TIF_NEED_RESCHED未置位*/ && --max_restart/*max_restart还未超过次数*/)goto restart; /*继续执行*//*条件不满足,只能唤醒ksoftirqd%u内核任务来执行软中断*/wakeup_softirqd();}lockdep_softirq_end(in_hardirq);account_irq_exit_time(current);__local_bh_enable(SOFTIRQ_OFFSET); /*减少preempt_count中的SOFTIRQ域计数,表明现在离开软中断上下文*/WARN_ON_ONCE(in_interrupt());tsk_restore_flags(current, old_flags, PF_MEMALLOC);
}
raise_softirq:标记本地软中断处于pendding状态,且当前处于非中断上下文(!in_interrupt())则唤醒内核线程ksoftirqd%u来执行软中断。
__do_softirq:立马执行软中断,除非执行时间超过2ms,或者需要进行调度或者循环次数超过10次则唤醒内核线程ksoftirqd%u来执行软中断。
2、tasklet
tasklet是利用软中断实现的一种下半部机制,本质上是软中断的一个变种,运行在软中断上下文。
2.1、数据结构
tasklet由tasklet_struct数据结构来描述:
/* Tasklets --- multithreaded analogue of BHs.Main feature differing them of generic softirqs: taskletis running only on one CPU simultaneously.tasklet不同于通用软中断的区别:tasklet只能同时在一个CPU上运行Main feature differing them of BHs: different taskletsmay be run simultaneously on different CPUs.Properties:* If tasklet_schedule() is called, then tasklet is guaranteedto be executed on some cpu at least once after this.* If the tasklet is already scheduled, but its execution is still notstarted, it will be executed only once.* If this tasklet is already running on another CPU (or schedule is calledfrom tasklet itself), it is rescheduled for later.* Tasklet is strictly serialized wrt(关于) itself, but notwrt(关于) another tasklets. If client needs some intertask synchronization,he makes it with spinlocks.*/struct tasklet_struct
{struct tasklet_struct *next;/*TASKLET_STATE_SCHED:表示tasklet已经被调度,正准备运行;TASKLET_STATE_RUN:表示tasklet正在运行;*/unsigned long state;/*count为0表示tasklet处于激活状态,不为0表示该tasklet被禁止,不允许执行。*/atomic_t count;void (*func)(unsigned long);unsigned long data;
};/** Tasklets*/
struct tasklet_head {struct tasklet_struct *head;struct tasklet_struct **tail;
};static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
每个cpu维护两个tasklet链表,一个用于普通优先级的tasklet_vec,另一个用于高优先级的tasklet_hi_vec,它们都是per-cpu变量。
void __init softirq_init(void)
{int cpu;for_each_possible_cpu(cpu) {/*初始化per-cpu变量 tasklet_vec和tasklet_hi_vec,tail=&head*/per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;}open_softirq(TASKLET_SOFTIRQ, tasklet_action); /*注册tasklet处理函数tasklet_action*/open_softirq(HI_SOFTIRQ, tasklet_hi_action);
}
TASKLET_SOFTIRQ的回调函数为tasklet_action,HI_SOFTIRQ的回调函数为tasklet_hi_action。
2.2、接口说明
定义tasklet,可以静态申明,也可以动态初始化。
静态申明处于激活状态的tasklet:
#define DECLARE_TASKLET(name, func, data) \
struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(0), func, data }静态申明处于非激活状态的tasklet:
#define DECLARE_TASKLET_DISABLED(name, func, data) \
struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data }enumvoid tasklet_init(struct tasklet_struct *t,void (*func)(unsigned long), unsigned long data)
{t->next = NULL;t->state = 0;atomic_set(&t->count, 0); /*初始化为0,激活状态*/t->func = func;t->data = data;
}
调度tasklet
/*将tasklet_struct *t挂载到per-cpu变量tasklet_vec上,依次执行*/
static inline void tasklet_schedule(struct tasklet_struct *t)
{if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))__tasklet_schedule(t);
}
test_and_set_bit原子地设置t->state为TASKLET_STATE_SCHED,然后返回t->state旧值。
返回true说明该tasklet已经被挂入tasklet_vec上,返回false则需要将其加入tasklet_vec上。
/*最开始tasklet_vec的初始条件:per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;*/
void __tasklet_schedule(struct tasklet_struct *t)
{unsigned long flags;local_irq_save(flags); /*关闭中断*/t->next = NULL; /*设置t->next的内容为NULL*/*__this_cpu_read(tasklet_vec.tail) = t; /*tail指向t,第一次时是head指向t,后面就是t->next指向新的t,这样就将所有的tasklet_struct *t串联起来*/__this_cpu_write(tasklet_vec.tail, &(t->next)); /*&(t->next)是t->next的地址*/raise_softirq_irqoff(TASKLET_SOFTIRQ); /*触发软中断tasklet*/local_irq_restore(flags); /*开启中断*/
}static inline void tasklet_hi_schedule(struct tasklet_struct *t)
{if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))__tasklet_hi_schedule(t);
}void __tasklet_hi_schedule(struct tasklet_struct *t)
{unsigned long flags;local_irq_save(flags);t->next = NULL;*__this_cpu_read(tasklet_hi_vec.tail) = t;__this_cpu_write(tasklet_hi_vec.tail, &(t->next));raise_softirq_irqoff(HI_SOFTIRQ);local_irq_restore(flags);
}/*tasklet对应的执行函数*/
static __latent_entropy void tasklet_action(struct softirq_action *a)
{struct tasklet_struct *list;local_irq_disable();/*关本地中断*/list = __this_cpu_read(tasklet_vec.head); /*list指向head*/__this_cpu_write(tasklet_vec.head, NULL); /*head=NULL*//*tail指向&head,后面的__this_cpu_write(tasklet_vec.tail, xxx)其实修改的是head的指向*/__this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));local_irq_enable(); /*开本地中断*//*遍历list执行tasklet*/while (list) {struct tasklet_struct *t = list;list = list->next;if (tasklet_trylock(t)) {/*设置state为TASKLET_STATE_RUN并返回其旧值*/if (!atomic_read(&t->count)) { /*t->count原子变量为0表示激活*//*清楚TASKLET_STATE_SCHED标志,如果未设置改标志则产生BUG()*/if (!test_and_clear_bit(TASKLET_STATE_SCHED,&t->state))BUG();t->func(t->data); /*执行tasklet_struct->func函数*/tasklet_unlock(t); /*清除TASKLET_STATE_RUN标志*/continue;}tasklet_unlock(t);}/*tasklet_trylock失败或者t->count非0,将t丢回tasklet_vec中等待下一次schedule*/local_irq_disable();t->next = NULL;*__this_cpu_read(tasklet_vec.tail) = t; /*设置 *tasklet_vec.tail=t,其实就是head->t*/__this_cpu_write(tasklet_vec.tail, &(t->next)); /*tail->&(t->next)*/__raise_softirq_irqoff(TASKLET_SOFTIRQ); /*触发tasklet软中断*/local_irq_enable();}
}
static inline int tasklet_trylock(struct tasklet_struct *t)
{/*设置TASKLET_STATE_RUN标志并返回旧值*/return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
}static inline void tasklet_unlock(struct tasklet_struct *t)
{smp_mb__before_atomic();clear_bit(TASKLET_STATE_RUN, &(t)->state); /*清除TASKLET_STATE_RUN标志*/
}static inline void tasklet_unlock_wait(struct tasklet_struct *t)
{while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
}static void tasklet_hi_action(struct softirq_action *a)
{struct tasklet_struct *list;local_irq_disable();list = __this_cpu_read(tasklet_hi_vec.head);__this_cpu_write(tasklet_hi_vec.head, NULL);__this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head));local_irq_enable();while (list) {struct tasklet_struct *t = list;list = list->next;if (tasklet_trylock(t)) {if (!atomic_read(&t->count)) {if (!test_and_clear_bit(TASKLET_STATE_SCHED,&t->state))BUG();t->func(t->data);tasklet_unlock(t);continue;}tasklet_unlock(t);}local_irq_disable();t->next = NULL;*__this_cpu_read(tasklet_hi_vec.tail) = t;__this_cpu_write(tasklet_hi_vec.tail, &(t->next));__raise_softirq_irqoff(HI_SOFTIRQ);local_irq_enable();}
}static inline void tasklet_disable(struct tasklet_struct *t)
{tasklet_disable_nosync(t);tasklet_unlock_wait(t);smp_mb();
}static inline void tasklet_disable_nosync(struct tasklet_struct *t)
{atomic_inc(&t->count);smp_mb__after_atomic();
}static inline void tasklet_enable(struct tasklet_struct *t)
{smp_mb__before_atomic();atomic_dec(&t->count);
}
3、local_bh_disable/local_bh_enable
local_bh_disable()和local_bh_enable()是内核中提供的关闭软中断的锁机制,它们组成的临界区禁止本地CPU在中断返回前夕执行软中断,这个临界区简称为BH临界区(bottom half critical region).
#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET ) //0x200
static inline void local_bh_disable(void)
{__local_bh_disable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
}static __always_inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
{preempt_count_add(cnt);barrier(); /*编译器优化*/
}
local_bh_disable把当前进程的preempt_count计数加上SOFTIRQ_DISABLE_OFFSET,表明内核已经进入软中断上下文。
static inline void local_bh_enable(void)
{__local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
}void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
{/*产生轻微警告*/WARN_ON_ONCE(in_irq()/*硬中断上下文*/ || irqs_disabled()/*关中断*/);
#ifdef CONFIG_TRACE_IRQFLAGSlocal_irq_disable();
#endif/** Are softirqs going to be turned on now:*/if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)trace_softirqs_on(ip);/** Keep preemption disabled until we are done with* softirq processing:*//*cnt - 1,留一个用于关闭本地cpu抢占,避免后面do_softirq()时被其他高优先级任务抢占,或则被迁移到其他cpu上。*/preempt_count_sub(cnt - 1);if (unlikely(!in_interrupt()/*非中断上下文*/ && local_softirq_pending()/*存在软中断pending情况*/)) {/** Run softirq if any pending. And do it in its own stack* as we may be calling this deep in a task call stack already.*/do_softirq();/*执行软中断*/}preempt_count_dec(); /*最后一个也减掉*/
#ifdef CONFIG_TRACE_IRQFLAGSlocal_irq_enable();
#endif/*前面执行软中断处理时可能会漏掉一些高优先级任务的抢占需求,这里重新检查*/preempt_check_resched();
}