rama982
/
kernel_xiaomi_lancelot


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532
							/*
 * Copyright (C) 2016 MediaTek Inc.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See http://www.gnu.org/licenses/gpl-2.0.html for more details.
 */

#include <linux/sched.h>
#include <linux/stat.h>
#include <linux/math64.h>
#include <linux/kobject.h>
#include <linux/sysfs.h>
#include <trace/events/sched.h>
#include <linux/stop_machine.h>
#include <linux/cpumask.h>
#include <linux/list_sort.h>


/*
 * Heterogenous multiprocessor (HMP) optimizations
 *
 * The cpu types are distinguished using a list of hmp_domains
 * which each represent one cpu type using a cpumask.
 * The list is assumed ordered by compute capacity with the
 * fastest domain first.
 */

DEFINE_PER_CPU(struct hmp_domain *, hmp_cpu_domain);
/* Setup hmp_domains */
void hmp_cpu_mask_setup(void)
{
	struct hmp_domain *domain;
	struct list_head *pos;
	int cpu;

	pr_info("Initializing HMP scheduler:\n");

	/* Initialize hmp_domains using platform code */
	if (list_empty(&hmp_domains)) {
		pr_info("HMP domain list is empty!\n");
		return;
	}

	/* Print hmp_domains */
	list_for_each(pos, &hmp_domains) {
		domain = list_entry(pos, struct hmp_domain, hmp_domains);

		for_each_cpu(cpu, &domain->possible_cpus)
			per_cpu(hmp_cpu_domain, cpu) = domain;
	}
	pr_info("Initializing HMP scheduler done\n");
}

/*
 * Heterogenous CPU capacity compare function
 * Only inspect lowest id of cpus in same domain.
 * Assume CPUs in same domain has same capacity.
 */
struct cluster_info {
	struct hmp_domain *hmpd;
	unsigned long cpu_perf;
	int cpu;
};

static inline void fillin_cluster(struct cluster_info *cinfo,
		struct hmp_domain *hmpd)
{
	int cpu;
	unsigned long cpu_perf;

	cinfo->hmpd = hmpd;
	cinfo->cpu = cpumask_any(&cinfo->hmpd->possible_cpus);

	for_each_cpu(cpu, &hmpd->possible_cpus) {
		cpu_perf = arch_scale_cpu_capacity(NULL, cpu);
		if (cpu_perf > 0)
			break;
	}
	cinfo->cpu_perf = cpu_perf;

	if (cpu_perf == 0)
		pr_info("Uninitialized CPU performance (CPU mask: %lx)",
				cpumask_bits(&hmpd->possible_cpus)[0]);
}

/*
 * Negative, if @a should sort before @b
 * Positive, if @a should sort after @b.
 * Return 0, if ordering is to be preserved
 */
int hmp_compare(void *priv, struct list_head *a, struct list_head *b)
{
	struct cluster_info ca;
	struct cluster_info cb;

	fillin_cluster(&ca, list_entry(a, struct hmp_domain, hmp_domains));
	fillin_cluster(&cb, list_entry(b, struct hmp_domain, hmp_domains));

	return (ca.cpu_perf > cb.cpu_perf) ? -1 : 1;
}

void init_hmp_domains(void)
{
	struct hmp_domain *domain;
	struct cpumask cpu_mask;
	int id, maxid;

	cpumask_clear(&cpu_mask);
	maxid = arch_get_nr_clusters();

	/*
	 * Initialize hmp_domains
	 * Must be ordered with respect to compute capacity.
	 * Fastest domain at head of list.
	 */
	for (id = 0; id < maxid; id++) {
		arch_get_cluster_cpus(&cpu_mask, id);
		domain = (struct hmp_domain *)
			kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
		if (domain) {
			cpumask_copy(&domain->possible_cpus, &cpu_mask);
			cpumask_and(&domain->cpus, cpu_online_mask,
				&domain->possible_cpus);
			list_add(&domain->hmp_domains, &hmp_domains);
		}
	}

	/*
	 * Sorting HMP domain by CPU capacity
	 */
	list_sort(NULL, &hmp_domains, &hmp_compare);
	pr_info("Sort hmp_domains from little to big:\n");
	for_each_hmp_domain_L_first(domain) {
		pr_info("    cpumask: 0x%02lx\n",
				*cpumask_bits(&domain->possible_cpus));
	}
	hmp_cpu_mask_setup();
}

#ifdef CONFIG_SCHED_HMP
static int is_heavy_task(struct task_struct *p)
{
	return p->se.avg.loadwop_avg >= 650 ? 1 : 0;
}

struct clb_env {
	struct clb_stats bstats;
	struct clb_stats lstats;
	int btarget, ltarget;

	struct cpumask bcpus;
	struct cpumask lcpus;

	unsigned int flags;
	struct mcheck {
		/* Details of this migration check */
		int status;
		/* Indicate whether we should perform this task migration */
		int result;
	} mcheck;
};

static void collect_cluster_stats(struct clb_stats *clbs,
		struct cpumask *cluster_cpus, int target)
{
	/* Update cluster informatics */
	int cpu;
	int loadwop;

	for_each_cpu(cpu, cluster_cpus) {
		if (cpu_online(cpu)) {
			clbs->ncpu++;
			clbs->ntask += cpu_rq(cpu)->cfs.h_nr_running;
			clbs->load_avg += cpu_rq(cpu)->cfs.avg.loadwop_avg;
#ifdef CONFIG_SCHED_HMP_PRIO_FILTER
			clbs->nr_normal_prio_task += cfs_nr_normal_prio(cpu);
			clbs->nr_dequeuing_low_prio +=
				cfs_nr_dequeuing_low_prio(cpu);
#endif
		}
	}

	if (!clbs->ncpu || target >= num_possible_cpus() ||
			!cpumask_test_cpu(target, cluster_cpus))
		return;

	/*
	 * Calculate available CPU capacity
	 * Calculate available task space
	 *
	 * Why load ratio should be multiplied by the number of task ?
	 * The task is the entity of scheduling unit so that we should consider
	 * it in scheduler. Only considering task load is not enough.
	 * Thus, multiplying the number of tasks can adjust load ratio to a more
	 * reasonable value.
	 */
	loadwop = cpu_rq(target)->cfs.avg.loadwop_avg;
	clbs->load_avg /= clbs->ncpu;
	clbs->acap = (clbs->cpu_capacity > loadwop) ?
		(clbs->cpu_capacity - loadwop) : 0;
	clbs->scaled_atask = (clbs->cpu_capacity > loadwop) ?
		(clbs->cpu_capacity - loadwop) : 0;

	trace_sched_cluster_stats(target,
			cpu_rq(target)->cfs.avg.loadwop_avg,
			cpu_rq(target)->cfs.h_nr_running,
			*cpumask_bits(cluster_cpus),
			clbs->ntask, clbs->load_avg,
			clbs->cpu_capacity, clbs->acap,
			clbs->scaled_atask, clbs->threshold);
}


/*
 * Task Dynamic Migration Threshold Adjustment.
 *
 * If the workload between clusters is not balanced, adjust migration
 * threshold in an attempt to move task precisely.
 *
 * Diff. = Max Threshold - Min Threshold
 *
 * Dynamic UP-Threshold =
 *                               B_nacap               B_natask
 * Max Threshold - Diff. x  -----------------  x  -------------------
 *                          B_nacap + L_nacap     B_natask + L_natask
 *
 *
 * Dynamic Down-Threshold =
 *                               L_nacap               L_natask
 * Min Threshold + Diff. x  -----------------  x  -------------------
 *                          B_nacap + L_nacap     B_natask + L_natask
 */
static void adj_threshold(struct clb_env *clbenv)
{
#define HMP_RESOLUTION_SCALING (4)
#define hmp_scale_down(w) ((w) >> HMP_RESOLUTION_SCALING)

	unsigned long b_cap = 0, l_cap = 0;
	int b_nacap, l_nacap;
	const int hmp_max_weight = scale_load_down(HMP_MAX_LOAD);

	b_cap = clbenv->bstats.cpu_power;
	l_cap = clbenv->lstats.cpu_power;
	b_nacap = clbenv->bstats.acap;
	l_nacap = clbenv->lstats.acap * l_cap / (b_cap+1);

	b_nacap = hmp_scale_down(b_nacap);
	l_nacap = hmp_scale_down(l_nacap);

	if ((b_nacap + l_nacap) == 0) {
		clbenv->bstats.threshold = hmp_max_weight;
		clbenv->lstats.threshold = 0;
	} else {
		clbenv->bstats.threshold = hmp_max_weight -
			(hmp_max_weight * b_nacap * b_nacap) /
			((b_nacap + l_nacap) * (b_nacap + l_nacap));
		clbenv->lstats.threshold = hmp_max_weight * l_nacap * l_nacap /
			((b_nacap + l_nacap) * (b_nacap + l_nacap));
	}
	trace_sched_adj_threshold(clbenv->bstats.threshold,
			clbenv->lstats.threshold, clbenv->ltarget,
			l_cap, clbenv->btarget, b_cap);
}

static void sched_update_clbstats(struct clb_env *clbenv)
{
	/* init cpu power and capacity */
	clbenv->bstats.cpu_power =
		(int) arch_scale_cpu_capacity(NULL, clbenv->btarget);
	clbenv->lstats.cpu_power =
		(int) arch_scale_cpu_capacity(NULL, clbenv->ltarget);
	clbenv->lstats.cpu_capacity = SCHED_CAPACITY_SCALE *
		clbenv->lstats.cpu_power / (clbenv->bstats.cpu_power+1);
	clbenv->bstats.cpu_capacity = SCHED_CAPACITY_SCALE;

	collect_cluster_stats(&clbenv->bstats, &clbenv->bcpus, clbenv->btarget);
	collect_cluster_stats(&clbenv->lstats, &clbenv->lcpus, clbenv->ltarget);
	adj_threshold(clbenv);
}

static struct hmp_domain *hmp_get_hmp_domain_for_cpu(int cpu)
{
	struct hmp_domain *domain;
	struct list_head *pos;

	list_for_each(pos, &hmp_domains) {
		domain = list_entry(pos, struct hmp_domain, hmp_domains);
		if (cpumask_test_cpu(cpu, &domain->possible_cpus))
			return domain;
	}
	return NULL;
}

static void hmp_online_cpu(int cpu)
{
	struct hmp_domain *domain = hmp_get_hmp_domain_for_cpu(cpu);

	if (domain)
		cpumask_set_cpu(cpu, &domain->cpus);
}

static void hmp_offline_cpu(int cpu)
{
	struct hmp_domain *domain = hmp_get_hmp_domain_for_cpu(cpu);

	if (domain)
		cpumask_clear_cpu(cpu, &domain->cpus);
}

unsigned int hmp_next_up_threshold = 4096;
unsigned int hmp_next_down_threshold = 4096;
#define hmp_last_up_migration(cpu) \
	cpu_rq(cpu)->cfs.avg.hmp_last_up_migration
#define hmp_last_down_migration(cpu) \
	cpu_rq(cpu)->cfs.avg.hmp_last_down_migration


static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
		int *min_cpu);

/* Check if cpu is in fastest hmp_domain */
inline unsigned int hmp_cpu_is_fastest(int cpu)
{
	struct list_head *pos;

	pos = &hmp_cpu_domain(cpu)->hmp_domains;
	return pos == hmp_domains.next;
}

/* Check if cpu is in slowest hmp_domain */
inline unsigned int hmp_cpu_is_slowest(int cpu)
{
	struct list_head *pos;

	pos = &hmp_cpu_domain(cpu)->hmp_domains;
	return list_is_last(pos, &hmp_domains);
}

/* Next (slower) hmp_domain relative to cpu */
static inline struct hmp_domain *hmp_slower_domain(int cpu)
{
	struct list_head *pos;

	pos = &hmp_cpu_domain(cpu)->hmp_domains;
	if (list_is_last(pos, &hmp_domains))
		return list_entry(pos, struct hmp_domain, hmp_domains);

	return list_entry(pos->next, struct hmp_domain, hmp_domains);
}

/* Previous (faster) hmp_domain relative to cpu */
static inline struct hmp_domain *hmp_faster_domain(int cpu)
{
	struct list_head *pos;

	pos = &hmp_cpu_domain(cpu)->hmp_domains;
	if (pos->prev == &hmp_domains)
		return list_entry(pos, struct hmp_domain, hmp_domains);

	return list_entry(pos->prev, struct hmp_domain, hmp_domains);
}

/*
 * Selects a cpu in previous (faster) hmp_domain
 * Note that cpumask_any_and() returns the first cpu in the cpumask
 */
static inline unsigned int hmp_select_faster_cpu(struct task_struct *tsk,
		int cpu)
{
	int lowest_cpu = num_possible_cpus();
	__always_unused int lowest_ratio =
		hmp_domain_min_load(hmp_faster_domain(cpu), &lowest_cpu);
	/*
	 * If the lowest-loaded CPU in the domain is allowed by
	 * the task affinity.
	 * Select that one, otherwise select one which is allowed
	 */
	if (lowest_cpu < nr_cpu_ids &&
			cpumask_test_cpu(lowest_cpu, &tsk->cpus_allowed))
		return lowest_cpu;
	else
		return cpumask_any_and(&hmp_faster_domain(cpu)->cpus,
				&tsk->cpus_allowed);
}

static inline void hmp_next_up_delay(struct sched_entity *se, int cpu)
{
	hmp_last_up_migration(cpu) = sched_clock();
	hmp_last_down_migration(cpu) = 0;
}

static inline void hmp_next_down_delay(struct sched_entity *se, int cpu)
{
	hmp_last_down_migration(cpu) = sched_clock();
	hmp_last_up_migration(cpu) = 0;
}

static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
		int *min_cpu)
{
	int cpu;
	int min_cpu_runnable_temp = num_possible_cpus();
	unsigned long min_runnable_load = INT_MAX;
	unsigned long contrib;

	for_each_cpu(cpu, &hmpd->cpus) {
		struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;

		/* don't use the divisor in the loop, just at the end */
		contrib = cfs_rq->runnable_load_avg * scale_load_down(1024);
		if (contrib < min_runnable_load) {
			min_runnable_load = contrib;
			min_cpu_runnable_temp = cpu;
		}
	}

	if (min_cpu)
		*min_cpu = min_cpu_runnable_temp;

	/* domain will often have at least one empty CPU */
	return min_runnable_load ? min_runnable_load / (__LOAD_AVG_MAX + 1) : 0;
}


/* Function Declaration */
static int hmp_up_stable(int cpu);
static int hmp_down_stable(int cpu);
static unsigned int hmp_up_migration(int cpu,
		int *target_cpu, struct sched_entity *se,
		struct clb_env *clbenv);
static unsigned int hmp_down_migration(int cpu,
		int *target_cpu, struct sched_entity *se,
		struct clb_env *clbenv);
#ifdef CONFIG_SCHED_HMP_PLUS
static struct sched_entity *hmp_get_heaviest_task(
		struct sched_entity *se, int target_cpu);
static struct sched_entity *hmp_get_lightest_task(
		struct sched_entity *se, int migrate_down);
#endif

#define hmp_caller_is_gb(caller) ((caller == HMP_GB)?1:0)

#define hmp_cpu_stable(cpu, up) (up ? \
		hmp_up_stable(cpu) : hmp_down_stable(cpu))

#define hmp_inc(v) ((v) + 1)

#define task_created(f) ((SD_BALANCE_EXEC == f || SD_BALANCE_FORK == f)?1:0)

/*
 * Heterogenous Multi-Processor (HMP) - Utility Function
 */

/*
 * These functions add next up/down migration delay that prevents the task from
 * doing another migration in the same direction until the delay has expired.
 */
static int hmp_up_stable(int cpu)
{
	u64 now = sched_clock();

	if (((now - hmp_last_up_migration(cpu)) >> 10) < hmp_next_up_threshold)
		return 0;
	return 1;
}

static int hmp_down_stable(int cpu)
{
	u64 now = sched_clock();
	u64 duration = now - hmp_last_down_migration(cpu);

	if ((duration >> 10) < hmp_next_down_threshold)
		return 0;
	return 1;
}

/* Select the most appropriate CPU from hmp cluster */
static unsigned int hmp_select_cpu(unsigned int caller, struct task_struct *p,
		struct cpumask *mask, int prev, int up)
{
	int curr = 0;
	int target = num_possible_cpus();
	unsigned long curr_wload = 0;
	unsigned long target_wload = 0;
	struct cpumask srcp;
	struct cpumask *tsk_cpus_allow = &p->cpus_allowed;

	cpumask_andnot(&srcp, cpu_online_mask, cpu_isolated_mask);
	cpumask_and(&srcp, &srcp, mask);
	target = cpumask_any_and(&srcp, tsk_cpus_allow);
	if (target >= num_possible_cpus())
		goto out;

	/*
	 * RT class is taken into account because CPU load is multiplied
	 * by the total number of CPU runnable tasks that includes RT tasks.
	 */
	target_wload = hmp_inc(cfs_load(target));
	target_wload *= rq_length(target);
	for_each_cpu(curr, mask) {
		/* Check CPU status and task affinity */
		if (!cpu_online(curr) ||
				!cpumask_test_cpu(curr, tsk_cpus_allow) ||
				cpu_isolated(curr))
			continue;

		/* For global load balancing, unstable CPU will be bypassed */
		if (hmp_caller_is_gb(caller) && !hmp_cpu_stable(curr, up))
			continue;

		curr_wload = hmp_inc(cfs_load(curr));
		curr_wload *= rq_length(curr);
		if (curr_wload < target_wload) {
			target_wload = curr_wload;
			target = curr;
		} else if (curr_wload == target_wload && curr == prev) {
			target = curr;
		}
	}

out:
	return target;
}

static int hmp_select_task_migration(int sd_flag,
		struct task_struct *p, int prev_cpu, int new_cpu,
		struct cpumask *fast_cpu_mask, struct cpumask *slow_cpu_mask)
{
	int step = 0;
	struct sched_entity *se = &p->se;
	int B_target = num_possible_cpus();
	int L_target = num_possible_cpus();
	struct clb_env clbenv;

	B_target = hmp_select_cpu(HMP_SELECT_RQ, p, fast_cpu_mask, prev_cpu, 0);
	L_target = hmp_select_cpu(HMP_SELECT_RQ, p, slow_cpu_mask, prev_cpu, 1);

	/*
	 * Only one cluster exists or only one cluster is allowed for this task
	 * Case 1: return the runqueue whose load is minimum
	 * Case 2: return original CFS runqueue selection result
	 */
	if (B_target >= num_possible_cpus() && L_target >= num_possible_cpus())
		goto out;
	if (B_target >= num_possible_cpus())
		goto select_slow;
	if (L_target >= num_possible_cpus())
		goto select_fast;

	/*
	 * Two clusters exist and both clusters are allowed for this task
	 * Step 1: Move newly created task to the cpu where no tasks are running
	 * Step 2: Migrate heavy-load task to big
	 * Step 3: Migrate light-load task to LITTLE
	 * Step 4: Make sure the task stays in its previous hmp domain
	 */
	step = 1;
	if (task_created(sd_flag) && !task_low_priority(p->prio)) {
		if (!rq_length(B_target))
			goto select_fast;
		if (!rq_length(L_target))
			goto select_slow;
	}
	memset(&clbenv, 0, sizeof(clbenv));
	clbenv.flags |= HMP_SELECT_RQ;
	cpumask_copy(&clbenv.lcpus, slow_cpu_mask);
	cpumask_copy(&clbenv.bcpus, fast_cpu_mask);
	clbenv.ltarget = L_target;
	clbenv.btarget = B_target;

	step = 2;
	sched_update_clbstats(&clbenv);
	if (hmp_up_migration(L_target, &B_target, se, &clbenv))
		goto select_fast;
	step = 3;
	if (hmp_down_migration(B_target, &L_target, se, &clbenv))
		goto select_slow;
	step = 4;
	if (hmp_cpu_is_slowest(prev_cpu))
		goto select_slow;
	goto select_fast;

select_fast:
	new_cpu = B_target;
	cpumask_clear(slow_cpu_mask);
	goto out;
select_slow:
	new_cpu = L_target;
	cpumask_copy(fast_cpu_mask, slow_cpu_mask);
	cpumask_clear(slow_cpu_mask);
	goto out;

out:
	/*
	 * Value of clbenb..load_avg only ready after step 2.
	 * Dump value after this step to avoid invalid stack value
	 */
	if (step > 1)
		trace_sched_hmp_load(step,
				clbenv.bstats.load_avg, clbenv.lstats.load_avg);
	return new_cpu;
}

/*
 * Heterogenous Multi-Processor (HMP) - Task Runqueue Selection
 */

/* This function enhances the original task selection function */
static int hmp_select_task_rq_fair(int sd_flag, struct task_struct *p,
		int prev_cpu, int new_cpu)
{
	struct list_head *pos;
	struct cpumask fast_cpu_mask, slow_cpu_mask;

	if (idle_cpu(new_cpu) && hmp_cpu_is_fastest(new_cpu))
		return new_cpu;

	/* error handling */
	if (prev_cpu >= num_possible_cpus())
		return new_cpu;

	/*
	 * Skip all the checks if only one CPU is online.
	 * Otherwise, select the most appropriate CPU from cluster.
	 */
	if (num_online_cpus() == 1)
		goto out;

	cpumask_clear(&fast_cpu_mask);
	cpumask_clear(&slow_cpu_mask);
	/* order: fast to slow hmp domain */
	list_for_each(pos, &hmp_domains) {
		struct hmp_domain *domain;

		domain = list_entry(pos, struct hmp_domain, hmp_domains);
		if (cpumask_empty(&domain->cpus))
			continue;
		if (cpumask_empty(&fast_cpu_mask)) {
			cpumask_copy(&fast_cpu_mask, &domain->possible_cpus);
		} else {
			cpumask_copy(&slow_cpu_mask, &domain->possible_cpus);
			new_cpu = hmp_select_task_migration(sd_flag, p,
				prev_cpu, new_cpu, &fast_cpu_mask,
				&slow_cpu_mask);
		}
	}

out:
	/* it happens when num_online_cpus=1 */
	if (new_cpu >= nr_cpu_ids) {
		/* BUG_ON(1); */
		new_cpu = prev_cpu;
	}

	return new_cpu;

}

#define hmp_fast_cpu_has_spare_cycles(B, cpu_load) \
		(cpu_load * capacity_margin < (SCHED_CAPACITY_SCALE * 1024))

#define hmp_task_fast_cpu_afford(B, se, cpu) \
		(B->acap > 0 && hmp_fast_cpu_has_spare_cycles(B, \
		se_load(se) + cfs_load(cpu)))

#define hmp_fast_cpu_oversubscribed(caller, B, se, cpu) \
	(hmp_caller_is_gb(caller) ? \
	 !hmp_fast_cpu_has_spare_cycles(B, cfs_load(cpu)) : \
	 !hmp_task_fast_cpu_afford(B, se, cpu))

#define hmp_task_slow_cpu_afford(L, se) \
	(L->acap > 0 && L->acap >= se_load(se))

/* Macro used by low-priority task filter */
#define hmp_low_prio_task_up_rejected(p, B, L) \
	(task_low_priority(p->prio) && \
	 (B->ntask >= B->ncpu || 0 != L->nr_normal_prio_task) && \
	 (p->se.avg.loadwop_avg < 800))

#define hmp_low_prio_task_down_allowed(p, B, L) \
	(task_low_priority(p->prio) && !B->nr_dequeuing_low_prio && \
	 B->ntask >= B->ncpu && 0 != L->nr_normal_prio_task && \
	 (p->se.avg.loadwop_avg < 800))

/* Migration check result */
#define HMP_BIG_NOT_OVERSUBSCRIBED           (0x01)
#define HMP_BIG_CAPACITY_INSUFFICIENT        (0x02)
#define HMP_LITTLE_CAPACITY_INSUFFICIENT     (0x04)
#define HMP_LOW_PRIORITY_FILTER              (0x08)
#define HMP_BIG_BUSY_LITTLE_IDLE             (0x10)
#define HMP_BIG_IDLE                         (0x20)
#define HMP_MIGRATION_APPROVED              (0x100)
#define HMP_TASK_UP_MIGRATION               (0x200)
#define HMP_TASK_DOWN_MIGRATION             (0x400)

/* Migration statistics */
struct hmp_statisic hmp_stats;

/*
 * Check whether this task should be migrated to big
 * Briefly summarize the flow as below;
 * 1) Migration stabilizing
 * 2) Filter low-priority task
 * 2.5) Keep all cpu busy
 * 3) Check CPU capacity
 * 4) Check dynamic migration threshold
 */
static unsigned int hmp_up_migration(int cpu,
		int *target_cpu, struct sched_entity *se,
		struct clb_env *clbenv)
{
	struct task_struct *p = task_of(se);
	struct clb_stats *L, *B;
	struct mcheck *check;
	int curr_cpu = cpu;
	unsigned int caller = clbenv->flags;
	cpumask_t act_mask;

	L = &clbenv->lstats;
	B = &clbenv->bstats;
	check = &clbenv->mcheck;

	check->status = clbenv->flags;
	check->status |= HMP_TASK_UP_MIGRATION;
	check->result = 0;

	cpumask_andnot(&act_mask, cpu_active_mask, cpu_isolated_mask);

	/*
	 * No migration is needed if
	 * 1) There is only one cluster
	 * 2) Task is already in big cluster
	 * 3) It violates task affinity
	 */
	if (!L->ncpu || !B->ncpu
			|| cpumask_test_cpu(curr_cpu, &clbenv->bcpus)
			|| !cpumask_intersects(&clbenv->bcpus,
				&p->cpus_allowed)
			|| !cpumask_intersects(&clbenv->bcpus, &act_mask))
		goto out;

	/*
	 * [1] Migration stabilizing
	 * Let the task load settle before doing another up migration.
	 * It can prevent a bunch of tasks from migrating to a unstable CPU.
	 */
	if (!hmp_up_stable(*target_cpu))
		goto out;

	/* [2] Filter low-priority task */
#ifdef CONFIG_SCHED_HMP_PRIO_FILTER
	if (hmp_low_prio_task_up_rejected(p, B, L)) {
		check->status |= HMP_LOW_PRIORITY_FILTER;
		goto trace;
	}
#endif

	/* [2.5]if big is idle, just go to big */
	if (rq_length(*target_cpu) == 0) {
		check->status |= HMP_BIG_IDLE;
		check->status |= HMP_MIGRATION_APPROVED;
		check->result = 1;
		goto trace;
	}

	/*
	 * [3] Check CPU capacity
	 * Forbid up-migration if big CPU can't handle this task
	 */
	if (!hmp_task_fast_cpu_afford(B, se, *target_cpu)) {
		check->status |= HMP_BIG_CAPACITY_INSUFFICIENT;
		goto trace;
	}

	/*
	 * [4] Check dynamic migration threshold
	 * Migrate task from LITTLE to big if load is greater than up-threshold
	 */
	if (se_load(se) >= B->threshold) {
		check->status |= HMP_MIGRATION_APPROVED;
		check->result = 1;
	}

trace:
	if (check->result && hmp_caller_is_gb(caller))
		hmp_stats.nr_force_up++;
	trace_sched_hmp_stats(&hmp_stats);
	trace_sched_dynamic_threshold(task_of(se), B->threshold, check->status,
			curr_cpu, *target_cpu, se_load(se), B, L);
	trace_sched_dynamic_threshold_draw(B->threshold, L->threshold);
out:
	return check->result;
}

/*
 * Check whether this task should be migrated to LITTLE
 * Briefly summarize the flow as below;
 * 1) Migration stabilizing
 * 1.5) Keep all cpu busy
 * 2) Filter low-priority task
 * 3) Check CPU capacity
 * 4) Check dynamic migration threshold
 */
static unsigned int hmp_down_migration(int cpu,
		int *target_cpu, struct sched_entity *se,
		struct clb_env *clbenv)
{
	struct task_struct *p = task_of(se);
	struct clb_stats *L, *B;
	struct mcheck *check;
	int curr_cpu = cpu;
	unsigned int caller = clbenv->flags;
	cpumask_t act_mask;

	L = &clbenv->lstats;
	B = &clbenv->bstats;
	check = &clbenv->mcheck;

	check->status = caller;
	check->status |= HMP_TASK_DOWN_MIGRATION;
	check->result = 0;

	cpumask_andnot(&act_mask, cpu_active_mask, cpu_isolated_mask);

	/*
	 * No migration is needed if
	 * 1) There is only one cluster
	 * 2) Task is already in LITTLE cluster
	 * 3) It violates task affinity
	 */
	if (!L->ncpu || !B->ncpu
			|| cpumask_test_cpu(curr_cpu, &clbenv->lcpus)
			|| !cpumask_intersects(&clbenv->lcpus,
				&p->cpus_allowed)
			|| !cpumask_intersects(&clbenv->lcpus, &act_mask))
		goto out;

	/*
	 * [1] Migration stabilizing
	 * Let the task load settle before doing another down migration.
	 * It can prevent a bunch of tasks from migrating to a unstable CPU.
	 */
	if (!hmp_down_stable(*target_cpu))
		goto out;

	/* [1.5]if big is busy and little is idle, just go to little */
	if (rq_length(*target_cpu) == 0 && caller == HMP_SELECT_RQ
			&& rq_length(curr_cpu) > 0) {
		struct rq *curr_rq = cpu_rq(curr_cpu);

		/*
		 * If current big core is not heavy task,
		 * and wake up task is heavy task.
		 *
		 * Dont go to little.
		 */
		if (!(!is_heavy_task(curr_rq->curr) && is_heavy_task(p))) {
			check->status |= HMP_BIG_BUSY_LITTLE_IDLE;
			check->status |= HMP_MIGRATION_APPROVED;
			check->result = 1;
			goto trace;
		}
	}

	/* [2] Filter low-priority task */
#ifdef CONFIG_SCHED_HMP_PRIO_FILTER
	if (hmp_low_prio_task_down_allowed(p, B, L)) {
		cfs_nr_dequeuing_low_prio(curr_cpu)++;
		check->status |= HMP_LOW_PRIORITY_FILTER;
		check->status |= HMP_MIGRATION_APPROVED;
		check->result = 1;
		goto trace;
	}
#endif

	/*
	 * [3] Check CPU capacity
	 * Forbid down-migration if either of the following conditions is true
	 * 1) big cpu is not oversubscribed (if big CPU seems to have spare
	 *    cycles, do not force this task to run on LITTLE CPU, but
	 *    keep it staying in its previous cluster instead)
	 * 2) LITTLE cpu doesn't have available capacity for this new task
	 */
	if (cpu_rq(curr_cpu)->cfs.h_nr_running > 1 &&
			!hmp_fast_cpu_oversubscribed(caller, B, se, curr_cpu)) {
		check->status |= HMP_BIG_NOT_OVERSUBSCRIBED;
		goto trace;
	}

	/*
	 * [4] Check dynamic migration threshold
	 * Migrate task from big to LITTLE if load ratio is less than
	 * or equal to down-threshold
	 */
	if (L->threshold >= se_load(se)) {
		check->status |= HMP_MIGRATION_APPROVED;
		check->result = 1;
	}

trace:
	if (check->result && hmp_caller_is_gb(caller))
		hmp_stats.nr_force_down++;
	trace_sched_hmp_stats(&hmp_stats);
	trace_sched_dynamic_threshold(task_of(se), L->threshold, check->status,
			curr_cpu, *target_cpu, se_load(se), B, L);
	trace_sched_dynamic_threshold_draw(B->threshold, L->threshold);
out:
	return check->result;
}

static int hmp_active_load_balance_cpu_stop(void *data)
{
	int ret;
	struct task_struct *p = ((struct rq *)data)->migrate_task;

	ret = active_load_balance_cpu_stop(data);
	put_task_struct(p);
	return ret;
}

/*
 * According to Linaro's comment, we should only check the currently running
 * tasks because selecting other tasks for migration will require extensive
 * book keeping.
 */
static void hmp_force_down_migration(int this_cpu)
{
	int target_cpu;
	struct sched_entity *se;
	struct rq *target;
	unsigned long flags;
	unsigned int force = 0;
	struct task_struct *p;
	struct clb_env clbenv;
#ifdef CONFIG_SCHED_HMP_PLUS
	struct sched_entity *orig;
	int B_cpu;
#endif
	struct hmp_domain *hmp_domain = NULL;
	struct cpumask fast_cpu_mask, slow_cpu_mask;

	cpumask_clear(&fast_cpu_mask);
	cpumask_clear(&slow_cpu_mask);

	/* Migrate light task from big to LITTLE */
	if (!hmp_cpu_is_slowest(this_cpu)) {
		hmp_domain = hmp_cpu_domain(this_cpu);
		cpumask_copy(&fast_cpu_mask, &hmp_domain->possible_cpus);
		while (!list_is_last(&hmp_domain->hmp_domains, &hmp_domains)) {
			struct list_head *pos = &hmp_domain->hmp_domains;

			hmp_domain = list_entry(pos->next,
					struct hmp_domain, hmp_domains);

			if (!cpumask_empty(&hmp_domain->cpus)) {
				cpumask_copy(&slow_cpu_mask,
						&hmp_domain->possible_cpus);
				break;
			}
		}
	}
	if (!hmp_domain || hmp_domain == hmp_cpu_domain(this_cpu))
		return;

	if (cpumask_empty(&fast_cpu_mask) || cpumask_empty(&slow_cpu_mask))
		return;

	force = 0;
	target = cpu_rq(this_cpu);
	raw_spin_lock_irqsave(&target->lock, flags);
	se = target->cfs.curr;
	if (!se) {
		raw_spin_unlock_irqrestore(&target->lock, flags);
		return;
	}

	/* Find task entity */
	if (!entity_is_task(se)) {
		struct cfs_rq *cfs_rq;

		cfs_rq = group_cfs_rq(se);
		while (cfs_rq) {
			se = cfs_rq->curr;
			cfs_rq = group_cfs_rq(se);
		}
	}
#ifdef CONFIG_SCHED_HMP_PLUS
	orig = se;
	se = hmp_get_lightest_task(orig, 1);
	if (!se) {
		raw_spin_unlock_irqrestore(&target->lock, flags);
		return;
	}
	if (!entity_is_task(se))
		p = task_of(orig);
	else
#endif
		p = task_of(se);
#ifdef CONFIG_SCHED_HMP_PLUS
	/*
	 * Don't offload to little if there is one idle big,
	 * let load balance to do it's work.
	 * Also, to prevent idle_balance from leading to potential ping-pong
	 */
	B_cpu = hmp_select_cpu(HMP_GB, p, &fast_cpu_mask, this_cpu, 0);
	if (B_cpu < nr_cpu_ids && !rq_length(B_cpu)) {
		raw_spin_unlock_irqrestore(&target->lock, flags);
		return;
	}
#endif
	target_cpu = hmp_select_cpu(HMP_GB, p, &slow_cpu_mask, -1, 1);
	if (target_cpu >= num_possible_cpus()) {
		raw_spin_unlock_irqrestore(&target->lock, flags);
		return;
	}

	/* Collect cluster information */
	memset(&clbenv, 0, sizeof(clbenv));
	clbenv.flags |= HMP_GB;
	clbenv.btarget = this_cpu;
	clbenv.ltarget = target_cpu;
	cpumask_copy(&clbenv.lcpus, &slow_cpu_mask);
	cpumask_copy(&clbenv.bcpus, &fast_cpu_mask);
	sched_update_clbstats(&clbenv);

#ifdef CONFIG_SCHED_HMP_PLUS
	if (cpu_rq(this_cpu)->cfs.h_nr_running < 2) {
		raw_spin_unlock_irqrestore(&target->lock, flags);
		return;
	}
#endif

	/* Check migration threshold */
	if (!target->active_balance &&
			hmp_down_migration(this_cpu,
				&target_cpu, se, &clbenv) &&
			!cpu_park(cpu_of(target))) {
		if (p->state != TASK_DEAD) {
			get_task_struct(p);
			target->active_balance = MIGR_DOWN_MIGRATE;
			target->push_cpu = target_cpu;
			target->migrate_task = p;
			force = 1;
			trace_sched_hmp_migrate(p, target->push_cpu,
					MIGR_DOWN_MIGRATE);
			hmp_next_down_delay(&p->se, target->push_cpu);
		}
	}
	raw_spin_unlock_irqrestore(&target->lock, flags);
	if (force) {
		if (!stop_one_cpu_nowait(cpu_of(target),
					hmp_active_load_balance_cpu_stop,
					target, &target->active_balance_work)) {
			put_task_struct(p); /* out of rq->lock */
			raw_spin_lock_irqsave(&target->lock, flags);
			target->active_balance = 0;
			target->migrate_task = NULL;
			force = 0;
			raw_spin_unlock_irqrestore(&target->lock, flags);
		}
	}

}

/*
 * hmp_force_up_migration checks runqueues for tasks that need to
 * be actively migrated to a faster cpu.
 */
static void hmp_force_up_migration(int this_cpu)
{
	int curr_cpu, target_cpu;
	struct sched_entity *se;
	struct rq *target;
	unsigned long flags;
	unsigned int force = 0;
	struct task_struct *p;
	struct clb_env clbenv;
#ifdef CONFIG_SCHED_HMP_PLUS
	struct sched_entity *orig;
#endif

	if (!spin_trylock(&hmp_force_migration))
		return;

	/* Migrate heavy task from LITTLE to big */
	for_each_online_cpu(curr_cpu) {
		struct hmp_domain *hmp_domain = NULL;
		struct cpumask fast_cpu_mask, slow_cpu_mask;

		cpumask_clear(&fast_cpu_mask);
		cpumask_clear(&slow_cpu_mask);
		if (!hmp_cpu_is_fastest(curr_cpu)) {
			/* current cpu is slow_cpu_mask*/
			hmp_domain = hmp_cpu_domain(curr_cpu);
			cpumask_copy(&slow_cpu_mask,
					&hmp_domain->possible_cpus);

			while (&hmp_domain->hmp_domains != hmp_domains.next) {
				struct list_head *pos;

				pos = &hmp_domain->hmp_domains;
				hmp_domain = list_entry(pos->prev,
						struct hmp_domain, hmp_domains);
				if (cpumask_empty(&hmp_domain->cpus))
					continue;

				cpumask_copy(&fast_cpu_mask,
						&hmp_domain->possible_cpus);
				break;
			}
		} else {
			hmp_force_down_migration(curr_cpu);
			continue;
		}
		if (!hmp_domain || hmp_domain == hmp_cpu_domain(curr_cpu))
			continue;

		if (cpumask_empty(&fast_cpu_mask) ||
				cpumask_empty(&slow_cpu_mask))
			continue;

		force = 0;
		target = cpu_rq(curr_cpu);
		raw_spin_lock_irqsave(&target->lock, flags);
		se = target->cfs.curr;
		if (!se) {
			raw_spin_unlock_irqrestore(&target->lock, flags);
			continue;
		}

		/* Find task entity */
		if (!entity_is_task(se)) {
			struct cfs_rq *cfs_rq;

			cfs_rq = group_cfs_rq(se);
			while (cfs_rq) {
				se = cfs_rq->curr;
				cfs_rq = group_cfs_rq(se);
			}
		}
#ifdef CONFIG_SCHED_HMP_PLUS
		orig = se;
		se = hmp_get_heaviest_task(se, -1);
		if (!se) {
			raw_spin_unlock_irqrestore(&target->lock, flags);
			continue;
		}
		if (!entity_is_task(se))
			p = task_of(orig);
		else
#endif
			p = task_of(se);

		target_cpu = hmp_select_cpu(HMP_GB, p, &fast_cpu_mask, -1, 0);
		if (target_cpu >= num_possible_cpus()) {
			raw_spin_unlock_irqrestore(&target->lock, flags);
			continue;
		}

		/* Collect cluster information */
		memset(&clbenv, 0, sizeof(clbenv));
		clbenv.flags |= HMP_GB;
		clbenv.ltarget = curr_cpu;
		clbenv.btarget = target_cpu;
		cpumask_copy(&clbenv.lcpus, &slow_cpu_mask);
		cpumask_copy(&clbenv.bcpus, &fast_cpu_mask);
		sched_update_clbstats(&clbenv);

		/* Check migration threshold */
		if (!target->active_balance &&
				hmp_up_migration(curr_cpu,
					&target_cpu, se, &clbenv) &&
				!cpu_park(cpu_of(target))) {
			if (p->state != TASK_DEAD) {
				get_task_struct(p);
				target->active_balance = MIGR_UP_MIGRATE;
				target->push_cpu = target_cpu;
				target->migrate_task = p;
				force = 1;
				trace_sched_hmp_migrate(p, target->push_cpu,
						MIGR_UP_MIGRATE);
				hmp_next_up_delay(&p->se, target->push_cpu);
			}
		}

		raw_spin_unlock_irqrestore(&target->lock, flags);
		if (force) {
			if (!stop_one_cpu_nowait(cpu_of(target),
					hmp_active_load_balance_cpu_stop,
					target, &target->active_balance_work)) {
				put_task_struct(p); /* out of rq->lock */
				raw_spin_lock_irqsave(&target->lock, flags);
				target->active_balance = 0;
				target->migrate_task = NULL;
				force = 0;
				raw_spin_unlock_irqrestore(
						&target->lock, flags);
			}
		} else
			hmp_force_down_migration(this_cpu);
	}

	trace_sched_hmp_load(100,
			clbenv.bstats.load_avg, clbenv.lstats.load_avg);
	spin_unlock(&hmp_force_migration);

}

static inline void
hmp_enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
	int cpu = cfs_rq->rq->cpu;

	cfs_rq->avg.loadwop_avg += se->avg.loadwop_avg;
	cfs_rq->avg.loadwop_sum += se->avg.loadwop_sum;

#ifdef CONFIG_SCHED_HMP_PRIO_FILTER
	if (!task_low_priority(task_of(se)->prio))
		cfs_nr_normal_prio(cpu)++;
#endif
	trace_sched_cfs_enqueue_task(task_of(se), se_load(se), cpu);
}

static inline void
hmp_dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
	int cpu = cfs_rq->rq->cpu;

	cfs_reset_nr_dequeuing_low_prio(cpu);
	if (!task_low_priority(task_of(se)->prio))
		cfs_nr_normal_prio(cpu)--;

	cfs_rq->avg.loadwop_avg = max_t(long,
			cfs_rq->avg.loadwop_avg - se->avg.loadwop_avg, 0);
	cfs_rq->avg.loadwop_sum = max_t(s64,
			cfs_rq->avg.loadwop_sum - se->avg.loadwop_sum, 0);

	trace_sched_cfs_dequeue_task(task_of(se), se_load(se), cfs_rq->rq->cpu);
}

/*
 * hmp_idle_pull looks at little domain runqueues to see
 * if a task should be pulled.
 *
 * Reuses hmp_force_migration spinlock.
 *
 */
static unsigned int hmp_idle_pull(int this_cpu)
{
	int cpu;
	struct sched_entity *curr, *orig;
	struct hmp_domain *hmp_domain = NULL;
	struct rq *target = NULL, *rq;
	unsigned long flags, ratio = 0;
	unsigned int moved = 0;
	struct task_struct *p = NULL;
	struct clb_env clbenv;
	struct task_struct *prev_selected = NULL;
	int selected = 0;

	if (!hmp_cpu_is_slowest(this_cpu))
		hmp_domain = hmp_slower_domain(this_cpu);
	if (!hmp_domain)
		return 0;

	if (!spin_trylock(&hmp_force_migration))
		return 0;

	memset(&clbenv, 0, sizeof(clbenv));
	clbenv.flags |= HMP_GB;
	clbenv.btarget = this_cpu;
	cpumask_copy(&clbenv.lcpus, &hmp_domain->possible_cpus);
	cpumask_copy(&clbenv.bcpus, &hmp_cpu_domain(this_cpu)->possible_cpus);

	/* first select a task */
	for_each_cpu(cpu, &hmp_domain->cpus) {
		rq = cpu_rq(cpu);
		raw_spin_lock_irqsave(&rq->lock, flags);
		curr = rq->cfs.curr;
		if (!curr) {
			raw_spin_unlock_irqrestore(&rq->lock, flags);
			continue;
		}
		if (!entity_is_task(curr)) {
			struct cfs_rq *cfs_rq;

			cfs_rq = group_cfs_rq(curr);
			while (cfs_rq) {
				curr = cfs_rq->curr;
				if (!entity_is_task(curr))
					cfs_rq = group_cfs_rq(curr);
				else
					cfs_rq = NULL;
			}
		}

		orig = curr;
		curr = hmp_get_heaviest_task(curr, this_cpu);
		/* check if heaviest eligible task on this
		 * CPU is heavier than previous task
		 */
		clbenv.ltarget = cpu;
		sched_update_clbstats(&clbenv);

		if (curr && entity_is_task(curr) &&
				(se_load(curr) > clbenv.bstats.threshold) &&
				(se_load(curr) > ratio) &&
				cpumask_test_cpu(this_cpu,
					&task_of(curr)->cpus_allowed)) {
			selected = 1;
			/* get task and selection inside rq lock  */
			p = task_of(curr);
			get_task_struct(p);

			target = rq;
			ratio = curr->avg.loadwop_avg;
		}

		raw_spin_unlock_irqrestore(&rq->lock, flags);

		if (selected) {
			if (prev_selected) /* To put task out of rq lock */
				put_task_struct(prev_selected);
			prev_selected = p;
			selected = 0;
		}
	}
	if (!p)
		goto done;

	moved = migrate_running_task(this_cpu, p, target);
done:
	spin_unlock(&hmp_force_migration);
	if (p)
		put_task_struct(p);
	return moved;
}

/* must hold runqueue lock for queue se is currently on */
static const int hmp_max_tasks = 5;
static struct sched_entity *hmp_get_heaviest_task(
		struct sched_entity *se, int target_cpu)
{
	int num_tasks = hmp_max_tasks;
	struct sched_entity *max_se = se;
	long int max_ratio = se->avg.loadwop_avg;
	const struct cpumask *hmp_target_mask = NULL;
	struct hmp_domain *hmp;

	if (hmp_cpu_is_fastest(cpu_of(se->cfs_rq->rq)))
		return max_se;

	if (!task_prefer_little(task_of(se))) {
		max_se = se;
		max_ratio = se->avg.loadwop_avg;
	}

	hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq));
	hmp_target_mask = &hmp->cpus;
	if (target_cpu >= 0) {
		/* idle_balance gets run on a CPU while
		 * it is in the middle of being hotplugged
		 * out. Bail early in that case.
		 */
		if (!cpumask_test_cpu(target_cpu, hmp_target_mask))
			return NULL;
		hmp_target_mask = cpumask_of(target_cpu);
	}
	/* The currently running task is not on the runqueue */
	se = __pick_first_entity(cfs_rq_of(se));
	while (num_tasks && se) {
		if (entity_is_task(se) && se->avg.loadwop_avg > max_ratio &&
				cpumask_intersects(hmp_target_mask,
					&task_of(se)->cpus_allowed)) {
			max_se = se;
			max_ratio = se->avg.loadwop_avg;
		}
		se = __pick_next_entity(se);
		num_tasks--;
	}
	return max_se;
}

static struct sched_entity *hmp_get_lightest_task(
		struct sched_entity *se, int migrate_down)
{
	int num_tasks = hmp_max_tasks;
	struct sched_entity *min_se = 0;
	unsigned long int min_ratio = INT_MAX;
	const struct cpumask *hmp_target_mask = NULL;

	if (migrate_down) {
		struct hmp_domain *hmp;

		if (hmp_cpu_is_slowest(cpu_of(se->cfs_rq->rq)))
			return min_se;
		hmp = hmp_slower_domain(cpu_of(se->cfs_rq->rq));
		hmp_target_mask = &hmp->cpus;
	}

	if (!task_prefer_big(task_of(se))) {
		min_se = se;
		min_ratio = se->avg.loadwop_avg;
	}

	/* The currently running task is not on the runqueue */
	se = __pick_first_entity(cfs_rq_of(se));

	while (num_tasks && se) {
		if (entity_is_task(se) &&
				(se->avg.loadwop_avg < min_ratio
				 && hmp_target_mask &&
				 cpumask_intersects(hmp_target_mask,
					 &task_of(se)->cpus_allowed))) {
			min_se = se;
			min_ratio = se->avg.loadwop_avg;
		}
		se = __pick_next_entity(se);
		num_tasks--;
	}
	return min_se;
}

inline int hmp_fork_balance(struct task_struct *p, int prev_cpu)
{
	int new_cpu = prev_cpu;
	int cpu = smp_processor_id();

	if (hmp_cpu_is_fastest(prev_cpu)) {
		/* prev_cpu is fastest domain */
		struct hmp_domain *hmpdom;
		__always_unused int lowest_ratio;

		hmpdom = list_entry(
				&hmp_cpu_domain(prev_cpu)->hmp_domains,
				struct hmp_domain, hmp_domains);

		lowest_ratio = hmp_domain_min_load(hmpdom, &new_cpu);

		if (new_cpu < nr_cpu_ids &&
				cpumask_test_cpu(new_cpu, &p->cpus_allowed)
				&& !cpu_isolated(new_cpu))
			return new_cpu;

		new_cpu = cpumask_any_and(&hmp_faster_domain(cpu)->cpus,
				&p->cpus_allowed);

		if (new_cpu < nr_cpu_ids)
			return new_cpu;
	} else {
		/* prev_cpu is not fastest domain */
		new_cpu = hmp_select_faster_cpu(p, prev_cpu);

		if (new_cpu < nr_cpu_ids)
			return new_cpu;
	}

	return new_cpu;
}
#endif

#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE

#include <linux/cpufreq.h>

static int cpufreq_callback(struct notifier_block *nb,
		unsigned long val, void *data)
{
	struct cpufreq_freqs *freq = data;
	int cpu = freq->cpu;
	struct cpumask cls_cpus;
	int id;

	if (freq->flags & CPUFREQ_CONST_LOOPS)
		return NOTIFY_OK;

	if (val == CPUFREQ_PRECHANGE) {
		arch_get_cluster_cpus(&cls_cpus, arch_get_cluster_id(cpu));
		for_each_cpu(id, &cls_cpus)
			arch_scale_set_curr_freq(id, freq->new);
	}

	return NOTIFY_OK;
}

static struct notifier_block cpufreq_notifier = {
	.notifier_call = cpufreq_callback,
};

static int cpufreq_policy_callback(struct notifier_block *nb,
		unsigned long val, void *data)
{
	struct cpufreq_policy *policy = data;
	int i;

	if (val != CPUFREQ_NOTIFY)
		return NOTIFY_OK;

	for_each_cpu(i, policy->cpus) {
		arch_scale_set_curr_freq(i, policy->cur);
		arch_scale_set_max_freq(i, policy->max);
		arch_scale_set_min_freq(i, policy->min);
	}

	return NOTIFY_OK;
}

static struct notifier_block cpufreq_policy_notifier = {
	.notifier_call = cpufreq_policy_callback,
};

static int __init register_cpufreq_notifier(void)
{
	int ret;

	ret = cpufreq_register_notifier(&cpufreq_notifier,
			CPUFREQ_TRANSITION_NOTIFIER);
	if (ret)
		return ret;

	return cpufreq_register_notifier(&cpufreq_policy_notifier,
			CPUFREQ_POLICY_NOTIFIER);
}
core_initcall(register_cpufreq_notifier);
#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */