Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 3 Jun 2020 22:13:47 +0000 (15:13 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 3 Jun 2020 22:13:47 +0000 (15:13 -0700)
Pull kvm updates from Paolo Bonzini:
 "ARM:
   - Move the arch-specific code into arch/arm64/kvm

   - Start the post-32bit cleanup

   - Cherry-pick a few non-invasive pre-NV patches

  x86:
   - Rework of TLB flushing

   - Rework of event injection, especially with respect to nested
     virtualization

   - Nested AMD event injection facelift, building on the rework of
     generic code and fixing a lot of corner cases

   - Nested AMD live migration support

   - Optimization for TSC deadline MSR writes and IPIs

   - Various cleanups

   - Asynchronous page fault cleanups (from tglx, common topic branch
     with tip tree)

   - Interrupt-based delivery of asynchronous "page ready" events (host
     side)

   - Hyper-V MSRs and hypercalls for guest debugging

   - VMX preemption timer fixes

  s390:
   - Cleanups

  Generic:
   - switch vCPU thread wakeup from swait to rcuwait

  The other architectures, and the guest side of the asynchronous page
  fault work, will come next week"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (256 commits)
  KVM: selftests: fix rdtsc() for vmx_tsc_adjust_test
  KVM: check userspace_addr for all memslots
  KVM: selftests: update hyperv_cpuid with SynDBG tests
  x86/kvm/hyper-v: Add support for synthetic debugger via hypercalls
  x86/kvm/hyper-v: enable hypercalls regardless of hypercall page
  x86/kvm/hyper-v: Add support for synthetic debugger interface
  x86/hyper-v: Add synthetic debugger definitions
  KVM: selftests: VMX preemption timer migration test
  KVM: nVMX: Fix VMX preemption timer migration
  x86/kvm/hyper-v: Explicitly align hcall param for kvm_hyperv_exit
  KVM: x86/pmu: Support full width counting
  KVM: x86/pmu: Tweak kvm_pmu_get_msr to pass 'struct msr_data' in
  KVM: x86: announce KVM_FEATURE_ASYNC_PF_INT
  KVM: x86: acknowledgment mechanism for async pf page ready notifications
  KVM: x86: interrupt based APF 'page ready' event delivery
  KVM: introduce kvm_read_guest_offset_cached()
  KVM: rename kvm_arch_can_inject_async_page_present() to kvm_arch_can_dequeue_async_page_present()
  KVM: x86: extend struct kvm_vcpu_pv_apf_data with token info
  Revert "KVM: async_pf: Fix #DF due to inject "Page not Present" and "Page Ready" exceptions simultaneously"
  KVM: VMX: Replace zero-length array with flexible-array
  ...

28 files changed:
1  2 
Documentation/virt/kvm/api.rst
MAINTAINERS
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_hyp.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/kernel/asm-offsets.c
arch/arm64/kernel/cpu_errata.c
arch/arm64/kernel/smp.c
arch/arm64/kvm/arm.c
arch/arm64/kvm/hyp/switch.c
arch/arm64/kvm/reset.c
arch/arm64/kvm/sys_regs.c
arch/arm64/kvm/vgic/vgic-mmio-v3.c
arch/arm64/kvm/vgic/vgic.h
arch/mips/kvm/mips.c
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/include/asm/hyperv-tlfs.h
arch/x86/include/asm/kvm_host.h
arch/x86/kernel/traps.c
arch/x86/kvm/hyperv.c
arch/x86/kvm/mmu/mmu.c
arch/x86/mm/fault.c
include/asm-generic/hyperv-tlfs.h
include/linux/sched.h
include/uapi/linux/kvm.h
kernel/exit.c
kernel/locking/lockdep.c

Simple merge
diff --cc MAINTAINERS
Simple merge
Simple merge
Simple merge
Simple merge
index 3539d7092612760cba39c071e5b3f4417fc6e51f,a27e0cd731e918e3ee91f277dd9df0aa9099cbf3..0577e21422845fa3d5bbe5c5e8bd6fd784e8bbec
@@@ -96,8 -92,11 +96,8 @@@ int main(void
    BLANK();
    DEFINE(CPU_BOOT_STACK,      offsetof(struct secondary_data, stack));
    DEFINE(CPU_BOOT_TASK,               offsetof(struct secondary_data, task));
 -#ifdef CONFIG_ARM64_PTR_AUTH
 -  DEFINE(CPU_BOOT_PTRAUTH_KEY,        offsetof(struct secondary_data, ptrauth_key));
 -#endif
    BLANK();
- #ifdef CONFIG_KVM_ARM_HOST
+ #ifdef CONFIG_KVM
    DEFINE(VCPU_CONTEXT,                offsetof(struct kvm_vcpu, arch.ctxt));
    DEFINE(VCPU_FAULT_DISR,     offsetof(struct kvm_vcpu, arch.fault.disr_el1));
    DEFINE(VCPU_WORKAROUND_FLAGS,       offsetof(struct kvm_vcpu, arch.workaround_flags));
Simple merge
Simple merge
index 0000000000000000000000000000000000000000,b5282943b85490bb7fd76619cd31105d7562d273..7a57381c05e8fc046472f1e1cf1fdbe3d8fc2ac8
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,1712 +1,1710 @@@
 -      kvm_set_ipa_limit();
 -
 -      return 0;
+ // SPDX-License-Identifier: GPL-2.0-only
+ /*
+  * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+  * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+  */
+ #include <linux/bug.h>
+ #include <linux/cpu_pm.h>
+ #include <linux/errno.h>
+ #include <linux/err.h>
+ #include <linux/kvm_host.h>
+ #include <linux/list.h>
+ #include <linux/module.h>
+ #include <linux/vmalloc.h>
+ #include <linux/fs.h>
+ #include <linux/mman.h>
+ #include <linux/sched.h>
+ #include <linux/kvm.h>
+ #include <linux/kvm_irqfd.h>
+ #include <linux/irqbypass.h>
+ #include <linux/sched/stat.h>
+ #include <trace/events/kvm.h>
+ #define CREATE_TRACE_POINTS
+ #include "trace_arm.h"
+ #include <linux/uaccess.h>
+ #include <asm/ptrace.h>
+ #include <asm/mman.h>
+ #include <asm/tlbflush.h>
+ #include <asm/cacheflush.h>
+ #include <asm/cpufeature.h>
+ #include <asm/virt.h>
+ #include <asm/kvm_arm.h>
+ #include <asm/kvm_asm.h>
+ #include <asm/kvm_mmu.h>
+ #include <asm/kvm_emulate.h>
+ #include <asm/kvm_coproc.h>
+ #include <asm/sections.h>
+ #include <kvm/arm_hypercalls.h>
+ #include <kvm/arm_pmu.h>
+ #include <kvm/arm_psci.h>
+ #ifdef REQUIRES_VIRT
+ __asm__(".arch_extension      virt");
+ #endif
+ DEFINE_PER_CPU(kvm_host_data_t, kvm_host_data);
+ static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
+ /* The VMID used in the VTTBR */
+ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
+ static u32 kvm_next_vmid;
+ static DEFINE_SPINLOCK(kvm_vmid_lock);
+ static bool vgic_present;
+ static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
+ DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
+ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+ {
+       return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
+ }
+ int kvm_arch_hardware_setup(void *opaque)
+ {
+       return 0;
+ }
+ int kvm_arch_check_processor_compat(void *opaque)
+ {
+       return 0;
+ }
+ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+                           struct kvm_enable_cap *cap)
+ {
+       int r;
+       if (cap->flags)
+               return -EINVAL;
+       switch (cap->cap) {
+       case KVM_CAP_ARM_NISV_TO_USER:
+               r = 0;
+               kvm->arch.return_nisv_io_abort_to_user = true;
+               break;
+       default:
+               r = -EINVAL;
+               break;
+       }
+       return r;
+ }
+ static int kvm_arm_default_max_vcpus(void)
+ {
+       return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
+ }
+ /**
+  * kvm_arch_init_vm - initializes a VM data structure
+  * @kvm:      pointer to the KVM struct
+  */
+ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
+ {
+       int ret, cpu;
+       ret = kvm_arm_setup_stage2(kvm, type);
+       if (ret)
+               return ret;
+       kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
+       if (!kvm->arch.last_vcpu_ran)
+               return -ENOMEM;
+       for_each_possible_cpu(cpu)
+               *per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1;
+       ret = kvm_alloc_stage2_pgd(kvm);
+       if (ret)
+               goto out_fail_alloc;
+       ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
+       if (ret)
+               goto out_free_stage2_pgd;
+       kvm_vgic_early_init(kvm);
+       /* Mark the initial VMID generation invalid */
+       kvm->arch.vmid.vmid_gen = 0;
+       /* The maximum number of VCPUs is limited by the host's GIC model */
+       kvm->arch.max_vcpus = kvm_arm_default_max_vcpus();
+       return ret;
+ out_free_stage2_pgd:
+       kvm_free_stage2_pgd(kvm);
+ out_fail_alloc:
+       free_percpu(kvm->arch.last_vcpu_ran);
+       kvm->arch.last_vcpu_ran = NULL;
+       return ret;
+ }
+ int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
+ {
+       return 0;
+ }
+ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+ {
+       return VM_FAULT_SIGBUS;
+ }
+ /**
+  * kvm_arch_destroy_vm - destroy the VM data structure
+  * @kvm:      pointer to the KVM struct
+  */
+ void kvm_arch_destroy_vm(struct kvm *kvm)
+ {
+       int i;
+       kvm_vgic_destroy(kvm);
+       free_percpu(kvm->arch.last_vcpu_ran);
+       kvm->arch.last_vcpu_ran = NULL;
+       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+               if (kvm->vcpus[i]) {
+                       kvm_vcpu_destroy(kvm->vcpus[i]);
+                       kvm->vcpus[i] = NULL;
+               }
+       }
+       atomic_set(&kvm->online_vcpus, 0);
+ }
+ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
+ {
+       int r;
+       switch (ext) {
+       case KVM_CAP_IRQCHIP:
+               r = vgic_present;
+               break;
+       case KVM_CAP_IOEVENTFD:
+       case KVM_CAP_DEVICE_CTRL:
+       case KVM_CAP_USER_MEMORY:
+       case KVM_CAP_SYNC_MMU:
+       case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
+       case KVM_CAP_ONE_REG:
+       case KVM_CAP_ARM_PSCI:
+       case KVM_CAP_ARM_PSCI_0_2:
+       case KVM_CAP_READONLY_MEM:
+       case KVM_CAP_MP_STATE:
+       case KVM_CAP_IMMEDIATE_EXIT:
+       case KVM_CAP_VCPU_EVENTS:
+       case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2:
+       case KVM_CAP_ARM_NISV_TO_USER:
+       case KVM_CAP_ARM_INJECT_EXT_DABT:
+               r = 1;
+               break;
+       case KVM_CAP_ARM_SET_DEVICE_ADDR:
+               r = 1;
+               break;
+       case KVM_CAP_NR_VCPUS:
+               r = num_online_cpus();
+               break;
+       case KVM_CAP_MAX_VCPUS:
+       case KVM_CAP_MAX_VCPU_ID:
+               if (kvm)
+                       r = kvm->arch.max_vcpus;
+               else
+                       r = kvm_arm_default_max_vcpus();
+               break;
+       case KVM_CAP_MSI_DEVID:
+               if (!kvm)
+                       r = -EINVAL;
+               else
+                       r = kvm->arch.vgic.msis_require_devid;
+               break;
+       case KVM_CAP_ARM_USER_IRQ:
+               /*
+                * 1: EL1_VTIMER, EL1_PTIMER, and PMU.
+                * (bump this number if adding more devices)
+                */
+               r = 1;
+               break;
+       default:
+               r = kvm_arch_vm_ioctl_check_extension(kvm, ext);
+               break;
+       }
+       return r;
+ }
+ long kvm_arch_dev_ioctl(struct file *filp,
+                       unsigned int ioctl, unsigned long arg)
+ {
+       return -EINVAL;
+ }
+ struct kvm *kvm_arch_alloc_vm(void)
+ {
+       if (!has_vhe())
+               return kzalloc(sizeof(struct kvm), GFP_KERNEL);
+       return vzalloc(sizeof(struct kvm));
+ }
+ void kvm_arch_free_vm(struct kvm *kvm)
+ {
+       if (!has_vhe())
+               kfree(kvm);
+       else
+               vfree(kvm);
+ }
+ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
+ {
+       if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
+               return -EBUSY;
+       if (id >= kvm->arch.max_vcpus)
+               return -EINVAL;
+       return 0;
+ }
+ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
+ {
+       int err;
+       /* Force users to call KVM_ARM_VCPU_INIT */
+       vcpu->arch.target = -1;
+       bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
+       /* Set up the timer */
+       kvm_timer_vcpu_init(vcpu);
+       kvm_pmu_vcpu_init(vcpu);
+       kvm_arm_reset_debug_ptr(vcpu);
+       kvm_arm_pvtime_vcpu_init(&vcpu->arch);
+       err = kvm_vgic_vcpu_init(vcpu);
+       if (err)
+               return err;
+       return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
+ }
+ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+ {
+ }
+ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+ {
+       if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
+               static_branch_dec(&userspace_irqchip_in_use);
+       kvm_mmu_free_memory_caches(vcpu);
+       kvm_timer_vcpu_terminate(vcpu);
+       kvm_pmu_vcpu_destroy(vcpu);
+       kvm_arm_vcpu_destroy(vcpu);
+ }
+ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+ {
+       return kvm_timer_is_pending(vcpu);
+ }
+ void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
+ {
+       /*
+        * If we're about to block (most likely because we've just hit a
+        * WFI), we need to sync back the state of the GIC CPU interface
+        * so that we have the latest PMR and group enables. This ensures
+        * that kvm_arch_vcpu_runnable has up-to-date data to decide
+        * whether we have pending interrupts.
+        *
+        * For the same reason, we want to tell GICv4 that we need
+        * doorbells to be signalled, should an interrupt become pending.
+        */
+       preempt_disable();
+       kvm_vgic_vmcr_sync(vcpu);
+       vgic_v4_put(vcpu, true);
+       preempt_enable();
+ }
+ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
+ {
+       preempt_disable();
+       vgic_v4_load(vcpu);
+       preempt_enable();
+ }
+ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+ {
+       int *last_ran;
+       kvm_host_data_t *cpu_data;
+       last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran);
+       cpu_data = this_cpu_ptr(&kvm_host_data);
+       /*
+        * We might get preempted before the vCPU actually runs, but
+        * over-invalidation doesn't affect correctness.
+        */
+       if (*last_ran != vcpu->vcpu_id) {
+               kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu);
+               *last_ran = vcpu->vcpu_id;
+       }
+       vcpu->cpu = cpu;
+       vcpu->arch.host_cpu_context = &cpu_data->host_ctxt;
+       kvm_vgic_load(vcpu);
+       kvm_timer_vcpu_load(vcpu);
+       kvm_vcpu_load_sysregs(vcpu);
+       kvm_arch_vcpu_load_fp(vcpu);
+       kvm_vcpu_pmu_restore_guest(vcpu);
+       if (kvm_arm_is_pvtime_enabled(&vcpu->arch))
+               kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
+       if (single_task_running())
+               vcpu_clear_wfx_traps(vcpu);
+       else
+               vcpu_set_wfx_traps(vcpu);
+       vcpu_ptrauth_setup_lazy(vcpu);
+ }
+ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+ {
+       kvm_arch_vcpu_put_fp(vcpu);
+       kvm_vcpu_put_sysregs(vcpu);
+       kvm_timer_vcpu_put(vcpu);
+       kvm_vgic_put(vcpu);
+       kvm_vcpu_pmu_restore_host(vcpu);
+       vcpu->cpu = -1;
+ }
+ static void vcpu_power_off(struct kvm_vcpu *vcpu)
+ {
+       vcpu->arch.power_off = true;
+       kvm_make_request(KVM_REQ_SLEEP, vcpu);
+       kvm_vcpu_kick(vcpu);
+ }
+ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+                                   struct kvm_mp_state *mp_state)
+ {
+       if (vcpu->arch.power_off)
+               mp_state->mp_state = KVM_MP_STATE_STOPPED;
+       else
+               mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+       return 0;
+ }
+ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+                                   struct kvm_mp_state *mp_state)
+ {
+       int ret = 0;
+       switch (mp_state->mp_state) {
+       case KVM_MP_STATE_RUNNABLE:
+               vcpu->arch.power_off = false;
+               break;
+       case KVM_MP_STATE_STOPPED:
+               vcpu_power_off(vcpu);
+               break;
+       default:
+               ret = -EINVAL;
+       }
+       return ret;
+ }
+ /**
+  * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled
+  * @v:                The VCPU pointer
+  *
+  * If the guest CPU is not waiting for interrupts or an interrupt line is
+  * asserted, the CPU is by definition runnable.
+  */
+ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
+ {
+       bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF);
+       return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
+               && !v->arch.power_off && !v->arch.pause);
+ }
+ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+ {
+       return vcpu_mode_priv(vcpu);
+ }
+ /* Just ensure a guest exit from a particular CPU */
+ static void exit_vm_noop(void *info)
+ {
+ }
+ void force_vm_exit(const cpumask_t *mask)
+ {
+       preempt_disable();
+       smp_call_function_many(mask, exit_vm_noop, NULL, true);
+       preempt_enable();
+ }
+ /**
+  * need_new_vmid_gen - check that the VMID is still valid
+  * @vmid: The VMID to check
+  *
+  * return true if there is a new generation of VMIDs being used
+  *
+  * The hardware supports a limited set of values with the value zero reserved
+  * for the host, so we check if an assigned value belongs to a previous
+  * generation, which requires us to assign a new value. If we're the first to
+  * use a VMID for the new generation, we must flush necessary caches and TLBs
+  * on all CPUs.
+  */
+ static bool need_new_vmid_gen(struct kvm_vmid *vmid)
+ {
+       u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
+       smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
+       return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen);
+ }
+ /**
+  * update_vmid - Update the vmid with a valid VMID for the current generation
+  * @kvm: The guest that struct vmid belongs to
+  * @vmid: The stage-2 VMID information struct
+  */
+ static void update_vmid(struct kvm_vmid *vmid)
+ {
+       if (!need_new_vmid_gen(vmid))
+               return;
+       spin_lock(&kvm_vmid_lock);
+       /*
+        * We need to re-check the vmid_gen here to ensure that if another vcpu
+        * already allocated a valid vmid for this vm, then this vcpu should
+        * use the same vmid.
+        */
+       if (!need_new_vmid_gen(vmid)) {
+               spin_unlock(&kvm_vmid_lock);
+               return;
+       }
+       /* First user of a new VMID generation? */
+       if (unlikely(kvm_next_vmid == 0)) {
+               atomic64_inc(&kvm_vmid_gen);
+               kvm_next_vmid = 1;
+               /*
+                * On SMP we know no other CPUs can use this CPU's or each
+                * other's VMID after force_vm_exit returns since the
+                * kvm_vmid_lock blocks them from reentry to the guest.
+                */
+               force_vm_exit(cpu_all_mask);
+               /*
+                * Now broadcast TLB + ICACHE invalidation over the inner
+                * shareable domain to make sure all data structures are
+                * clean.
+                */
+               kvm_call_hyp(__kvm_flush_vm_context);
+       }
+       vmid->vmid = kvm_next_vmid;
+       kvm_next_vmid++;
+       kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
+       smp_wmb();
+       WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen));
+       spin_unlock(&kvm_vmid_lock);
+ }
+ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
+ {
+       struct kvm *kvm = vcpu->kvm;
+       int ret = 0;
+       if (likely(vcpu->arch.has_run_once))
+               return 0;
+       if (!kvm_arm_vcpu_is_finalized(vcpu))
+               return -EPERM;
+       vcpu->arch.has_run_once = true;
+       if (likely(irqchip_in_kernel(kvm))) {
+               /*
+                * Map the VGIC hardware resources before running a vcpu the
+                * first time on this VM.
+                */
+               if (unlikely(!vgic_ready(kvm))) {
+                       ret = kvm_vgic_map_resources(kvm);
+                       if (ret)
+                               return ret;
+               }
+       } else {
+               /*
+                * Tell the rest of the code that there are userspace irqchip
+                * VMs in the wild.
+                */
+               static_branch_inc(&userspace_irqchip_in_use);
+       }
+       ret = kvm_timer_enable(vcpu);
+       if (ret)
+               return ret;
+       ret = kvm_arm_pmu_v3_enable(vcpu);
+       return ret;
+ }
+ bool kvm_arch_intc_initialized(struct kvm *kvm)
+ {
+       return vgic_initialized(kvm);
+ }
+ void kvm_arm_halt_guest(struct kvm *kvm)
+ {
+       int i;
+       struct kvm_vcpu *vcpu;
+       kvm_for_each_vcpu(i, vcpu, kvm)
+               vcpu->arch.pause = true;
+       kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP);
+ }
+ void kvm_arm_resume_guest(struct kvm *kvm)
+ {
+       int i;
+       struct kvm_vcpu *vcpu;
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               vcpu->arch.pause = false;
+               rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
+       }
+ }
+ static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
+ {
+       struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
+       rcuwait_wait_event(wait,
+                          (!vcpu->arch.power_off) &&(!vcpu->arch.pause),
+                          TASK_INTERRUPTIBLE);
+       if (vcpu->arch.power_off || vcpu->arch.pause) {
+               /* Awaken to handle a signal, request we sleep again later. */
+               kvm_make_request(KVM_REQ_SLEEP, vcpu);
+       }
+       /*
+        * Make sure we will observe a potential reset request if we've
+        * observed a change to the power state. Pairs with the smp_wmb() in
+        * kvm_psci_vcpu_on().
+        */
+       smp_rmb();
+ }
+ static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
+ {
+       return vcpu->arch.target >= 0;
+ }
+ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
+ {
+       if (kvm_request_pending(vcpu)) {
+               if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
+                       vcpu_req_sleep(vcpu);
+               if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
+                       kvm_reset_vcpu(vcpu);
+               /*
+                * Clear IRQ_PENDING requests that were made to guarantee
+                * that a VCPU sees new virtual interrupts.
+                */
+               kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
+               if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
+                       kvm_update_stolen_time(vcpu);
+               if (kvm_check_request(KVM_REQ_RELOAD_GICv4, vcpu)) {
+                       /* The distributor enable bits were changed */
+                       preempt_disable();
+                       vgic_v4_put(vcpu, false);
+                       vgic_v4_load(vcpu);
+                       preempt_enable();
+               }
+       }
+ }
+ /**
+  * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
+  * @vcpu:     The VCPU pointer
+  *
+  * This function is called through the VCPU_RUN ioctl called from user space. It
+  * will execute VM code in a loop until the time slice for the process is used
+  * or some emulation is needed from user space in which case the function will
+  * return with return value 0 and with the kvm_run structure filled in with the
+  * required data for the requested emulation.
+  */
+ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+ {
+       struct kvm_run *run = vcpu->run;
+       int ret;
+       if (unlikely(!kvm_vcpu_initialized(vcpu)))
+               return -ENOEXEC;
+       ret = kvm_vcpu_first_run_init(vcpu);
+       if (ret)
+               return ret;
+       if (run->exit_reason == KVM_EXIT_MMIO) {
+               ret = kvm_handle_mmio_return(vcpu, run);
+               if (ret)
+                       return ret;
+       }
+       if (run->immediate_exit)
+               return -EINTR;
+       vcpu_load(vcpu);
+       kvm_sigset_activate(vcpu);
+       ret = 1;
+       run->exit_reason = KVM_EXIT_UNKNOWN;
+       while (ret > 0) {
+               /*
+                * Check conditions before entering the guest
+                */
+               cond_resched();
+               update_vmid(&vcpu->kvm->arch.vmid);
+               check_vcpu_requests(vcpu);
+               /*
+                * Preparing the interrupts to be injected also
+                * involves poking the GIC, which must be done in a
+                * non-preemptible context.
+                */
+               preempt_disable();
+               kvm_pmu_flush_hwstate(vcpu);
+               local_irq_disable();
+               kvm_vgic_flush_hwstate(vcpu);
+               /*
+                * Exit if we have a signal pending so that we can deliver the
+                * signal to user space.
+                */
+               if (signal_pending(current)) {
+                       ret = -EINTR;
+                       run->exit_reason = KVM_EXIT_INTR;
+               }
+               /*
+                * If we're using a userspace irqchip, then check if we need
+                * to tell a userspace irqchip about timer or PMU level
+                * changes and if so, exit to userspace (the actual level
+                * state gets updated in kvm_timer_update_run and
+                * kvm_pmu_update_run below).
+                */
+               if (static_branch_unlikely(&userspace_irqchip_in_use)) {
+                       if (kvm_timer_should_notify_user(vcpu) ||
+                           kvm_pmu_should_notify_user(vcpu)) {
+                               ret = -EINTR;
+                               run->exit_reason = KVM_EXIT_INTR;
+                       }
+               }
+               /*
+                * Ensure we set mode to IN_GUEST_MODE after we disable
+                * interrupts and before the final VCPU requests check.
+                * See the comment in kvm_vcpu_exiting_guest_mode() and
+                * Documentation/virt/kvm/vcpu-requests.rst
+                */
+               smp_store_mb(vcpu->mode, IN_GUEST_MODE);
+               if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) ||
+                   kvm_request_pending(vcpu)) {
+                       vcpu->mode = OUTSIDE_GUEST_MODE;
+                       isb(); /* Ensure work in x_flush_hwstate is committed */
+                       kvm_pmu_sync_hwstate(vcpu);
+                       if (static_branch_unlikely(&userspace_irqchip_in_use))
+                               kvm_timer_sync_hwstate(vcpu);
+                       kvm_vgic_sync_hwstate(vcpu);
+                       local_irq_enable();
+                       preempt_enable();
+                       continue;
+               }
+               kvm_arm_setup_debug(vcpu);
+               /**************************************************************
+                * Enter the guest
+                */
+               trace_kvm_entry(*vcpu_pc(vcpu));
+               guest_enter_irqoff();
+               if (has_vhe()) {
+                       ret = kvm_vcpu_run_vhe(vcpu);
+               } else {
+                       ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu);
+               }
+               vcpu->mode = OUTSIDE_GUEST_MODE;
+               vcpu->stat.exits++;
+               /*
+                * Back from guest
+                *************************************************************/
+               kvm_arm_clear_debug(vcpu);
+               /*
+                * We must sync the PMU state before the vgic state so
+                * that the vgic can properly sample the updated state of the
+                * interrupt line.
+                */
+               kvm_pmu_sync_hwstate(vcpu);
+               /*
+                * Sync the vgic state before syncing the timer state because
+                * the timer code needs to know if the virtual timer
+                * interrupts are active.
+                */
+               kvm_vgic_sync_hwstate(vcpu);
+               /*
+                * Sync the timer hardware state before enabling interrupts as
+                * we don't want vtimer interrupts to race with syncing the
+                * timer virtual interrupt state.
+                */
+               if (static_branch_unlikely(&userspace_irqchip_in_use))
+                       kvm_timer_sync_hwstate(vcpu);
+               kvm_arch_vcpu_ctxsync_fp(vcpu);
+               /*
+                * We may have taken a host interrupt in HYP mode (ie
+                * while executing the guest). This interrupt is still
+                * pending, as we haven't serviced it yet!
+                *
+                * We're now back in SVC mode, with interrupts
+                * disabled.  Enabling the interrupts now will have
+                * the effect of taking the interrupt again, in SVC
+                * mode this time.
+                */
+               local_irq_enable();
+               /*
+                * We do local_irq_enable() before calling guest_exit() so
+                * that if a timer interrupt hits while running the guest we
+                * account that tick as being spent in the guest.  We enable
+                * preemption after calling guest_exit() so that if we get
+                * preempted we make sure ticks after that is not counted as
+                * guest time.
+                */
+               guest_exit();
+               trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
+               /* Exit types that need handling before we can be preempted */
+               handle_exit_early(vcpu, run, ret);
+               preempt_enable();
+               ret = handle_exit(vcpu, run, ret);
+       }
+       /* Tell userspace about in-kernel device output levels */
+       if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
+               kvm_timer_update_run(vcpu);
+               kvm_pmu_update_run(vcpu);
+       }
+       kvm_sigset_deactivate(vcpu);
+       vcpu_put(vcpu);
+       return ret;
+ }
+ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
+ {
+       int bit_index;
+       bool set;
+       unsigned long *hcr;
+       if (number == KVM_ARM_IRQ_CPU_IRQ)
+               bit_index = __ffs(HCR_VI);
+       else /* KVM_ARM_IRQ_CPU_FIQ */
+               bit_index = __ffs(HCR_VF);
+       hcr = vcpu_hcr(vcpu);
+       if (level)
+               set = test_and_set_bit(bit_index, hcr);
+       else
+               set = test_and_clear_bit(bit_index, hcr);
+       /*
+        * If we didn't change anything, no need to wake up or kick other CPUs
+        */
+       if (set == level)
+               return 0;
+       /*
+        * The vcpu irq_lines field was updated, wake up sleeping VCPUs and
+        * trigger a world-switch round on the running physical CPU to set the
+        * virtual IRQ/FIQ fields in the HCR appropriately.
+        */
+       kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
+       kvm_vcpu_kick(vcpu);
+       return 0;
+ }
+ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
+                         bool line_status)
+ {
+       u32 irq = irq_level->irq;
+       unsigned int irq_type, vcpu_idx, irq_num;
+       int nrcpus = atomic_read(&kvm->online_vcpus);
+       struct kvm_vcpu *vcpu = NULL;
+       bool level = irq_level->level;
+       irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
+       vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
+       vcpu_idx += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1);
+       irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK;
+       trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
+       switch (irq_type) {
+       case KVM_ARM_IRQ_TYPE_CPU:
+               if (irqchip_in_kernel(kvm))
+                       return -ENXIO;
+               if (vcpu_idx >= nrcpus)
+                       return -EINVAL;
+               vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+               if (!vcpu)
+                       return -EINVAL;
+               if (irq_num > KVM_ARM_IRQ_CPU_FIQ)
+                       return -EINVAL;
+               return vcpu_interrupt_line(vcpu, irq_num, level);
+       case KVM_ARM_IRQ_TYPE_PPI:
+               if (!irqchip_in_kernel(kvm))
+                       return -ENXIO;
+               if (vcpu_idx >= nrcpus)
+                       return -EINVAL;
+               vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+               if (!vcpu)
+                       return -EINVAL;
+               if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
+                       return -EINVAL;
+               return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL);
+       case KVM_ARM_IRQ_TYPE_SPI:
+               if (!irqchip_in_kernel(kvm))
+                       return -ENXIO;
+               if (irq_num < VGIC_NR_PRIVATE_IRQS)
+                       return -EINVAL;
+               return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL);
+       }
+       return -EINVAL;
+ }
+ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+                              const struct kvm_vcpu_init *init)
+ {
+       unsigned int i, ret;
+       int phys_target = kvm_target_cpu();
+       if (init->target != phys_target)
+               return -EINVAL;
+       /*
+        * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+        * use the same target.
+        */
+       if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
+               return -EINVAL;
+       /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
+       for (i = 0; i < sizeof(init->features) * 8; i++) {
+               bool set = (init->features[i / 32] & (1 << (i % 32)));
+               if (set && i >= KVM_VCPU_MAX_FEATURES)
+                       return -ENOENT;
+               /*
+                * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+                * use the same feature set.
+                */
+               if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
+                   test_bit(i, vcpu->arch.features) != set)
+                       return -EINVAL;
+               if (set)
+                       set_bit(i, vcpu->arch.features);
+       }
+       vcpu->arch.target = phys_target;
+       /* Now we know what it is, we can reset it. */
+       ret = kvm_reset_vcpu(vcpu);
+       if (ret) {
+               vcpu->arch.target = -1;
+               bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
+       }
+       return ret;
+ }
+ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
+                                        struct kvm_vcpu_init *init)
+ {
+       int ret;
+       ret = kvm_vcpu_set_target(vcpu, init);
+       if (ret)
+               return ret;
+       /*
+        * Ensure a rebooted VM will fault in RAM pages and detect if the
+        * guest MMU is turned off and flush the caches as needed.
+        *
+        * S2FWB enforces all memory accesses to RAM being cacheable, we
+        * ensure that the cache is always coherent.
+        */
+       if (vcpu->arch.has_run_once && !cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+               stage2_unmap_vm(vcpu->kvm);
+       vcpu_reset_hcr(vcpu);
+       /*
+        * Handle the "start in power-off" case.
+        */
+       if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+               vcpu_power_off(vcpu);
+       else
+               vcpu->arch.power_off = false;
+       return 0;
+ }
+ static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu,
+                                struct kvm_device_attr *attr)
+ {
+       int ret = -ENXIO;
+       switch (attr->group) {
+       default:
+               ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr);
+               break;
+       }
+       return ret;
+ }
+ static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu,
+                                struct kvm_device_attr *attr)
+ {
+       int ret = -ENXIO;
+       switch (attr->group) {
+       default:
+               ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr);
+               break;
+       }
+       return ret;
+ }
+ static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu,
+                                struct kvm_device_attr *attr)
+ {
+       int ret = -ENXIO;
+       switch (attr->group) {
+       default:
+               ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr);
+               break;
+       }
+       return ret;
+ }
+ static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+                                  struct kvm_vcpu_events *events)
+ {
+       memset(events, 0, sizeof(*events));
+       return __kvm_arm_vcpu_get_events(vcpu, events);
+ }
+ static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+                                  struct kvm_vcpu_events *events)
+ {
+       int i;
+       /* check whether the reserved field is zero */
+       for (i = 0; i < ARRAY_SIZE(events->reserved); i++)
+               if (events->reserved[i])
+                       return -EINVAL;
+       /* check whether the pad field is zero */
+       for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++)
+               if (events->exception.pad[i])
+                       return -EINVAL;
+       return __kvm_arm_vcpu_set_events(vcpu, events);
+ }
+ long kvm_arch_vcpu_ioctl(struct file *filp,
+                        unsigned int ioctl, unsigned long arg)
+ {
+       struct kvm_vcpu *vcpu = filp->private_data;
+       void __user *argp = (void __user *)arg;
+       struct kvm_device_attr attr;
+       long r;
+       switch (ioctl) {
+       case KVM_ARM_VCPU_INIT: {
+               struct kvm_vcpu_init init;
+               r = -EFAULT;
+               if (copy_from_user(&init, argp, sizeof(init)))
+                       break;
+               r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
+               break;
+       }
+       case KVM_SET_ONE_REG:
+       case KVM_GET_ONE_REG: {
+               struct kvm_one_reg reg;
+               r = -ENOEXEC;
+               if (unlikely(!kvm_vcpu_initialized(vcpu)))
+                       break;
+               r = -EFAULT;
+               if (copy_from_user(&reg, argp, sizeof(reg)))
+                       break;
+               if (ioctl == KVM_SET_ONE_REG)
+                       r = kvm_arm_set_reg(vcpu, &reg);
+               else
+                       r = kvm_arm_get_reg(vcpu, &reg);
+               break;
+       }
+       case KVM_GET_REG_LIST: {
+               struct kvm_reg_list __user *user_list = argp;
+               struct kvm_reg_list reg_list;
+               unsigned n;
+               r = -ENOEXEC;
+               if (unlikely(!kvm_vcpu_initialized(vcpu)))
+                       break;
+               r = -EPERM;
+               if (!kvm_arm_vcpu_is_finalized(vcpu))
+                       break;
+               r = -EFAULT;
+               if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
+                       break;
+               n = reg_list.n;
+               reg_list.n = kvm_arm_num_regs(vcpu);
+               if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
+                       break;
+               r = -E2BIG;
+               if (n < reg_list.n)
+                       break;
+               r = kvm_arm_copy_reg_indices(vcpu, user_list->reg);
+               break;
+       }
+       case KVM_SET_DEVICE_ATTR: {
+               r = -EFAULT;
+               if (copy_from_user(&attr, argp, sizeof(attr)))
+                       break;
+               r = kvm_arm_vcpu_set_attr(vcpu, &attr);
+               break;
+       }
+       case KVM_GET_DEVICE_ATTR: {
+               r = -EFAULT;
+               if (copy_from_user(&attr, argp, sizeof(attr)))
+                       break;
+               r = kvm_arm_vcpu_get_attr(vcpu, &attr);
+               break;
+       }
+       case KVM_HAS_DEVICE_ATTR: {
+               r = -EFAULT;
+               if (copy_from_user(&attr, argp, sizeof(attr)))
+                       break;
+               r = kvm_arm_vcpu_has_attr(vcpu, &attr);
+               break;
+       }
+       case KVM_GET_VCPU_EVENTS: {
+               struct kvm_vcpu_events events;
+               if (kvm_arm_vcpu_get_events(vcpu, &events))
+                       return -EINVAL;
+               if (copy_to_user(argp, &events, sizeof(events)))
+                       return -EFAULT;
+               return 0;
+       }
+       case KVM_SET_VCPU_EVENTS: {
+               struct kvm_vcpu_events events;
+               if (copy_from_user(&events, argp, sizeof(events)))
+                       return -EFAULT;
+               return kvm_arm_vcpu_set_events(vcpu, &events);
+       }
+       case KVM_ARM_VCPU_FINALIZE: {
+               int what;
+               if (!kvm_vcpu_initialized(vcpu))
+                       return -ENOEXEC;
+               if (get_user(what, (const int __user *)argp))
+                       return -EFAULT;
+               return kvm_arm_vcpu_finalize(vcpu, what);
+       }
+       default:
+               r = -EINVAL;
+       }
+       return r;
+ }
+ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+ {
+ }
+ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
+                                       struct kvm_memory_slot *memslot)
+ {
+       kvm_flush_remote_tlbs(kvm);
+ }
+ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
+                                       struct kvm_arm_device_addr *dev_addr)
+ {
+       unsigned long dev_id, type;
+       dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >>
+               KVM_ARM_DEVICE_ID_SHIFT;
+       type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >>
+               KVM_ARM_DEVICE_TYPE_SHIFT;
+       switch (dev_id) {
+       case KVM_ARM_DEVICE_VGIC_V2:
+               if (!vgic_present)
+                       return -ENXIO;
+               return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
+       default:
+               return -ENODEV;
+       }
+ }
+ long kvm_arch_vm_ioctl(struct file *filp,
+                      unsigned int ioctl, unsigned long arg)
+ {
+       struct kvm *kvm = filp->private_data;
+       void __user *argp = (void __user *)arg;
+       switch (ioctl) {
+       case KVM_CREATE_IRQCHIP: {
+               int ret;
+               if (!vgic_present)
+                       return -ENXIO;
+               mutex_lock(&kvm->lock);
+               ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
+               mutex_unlock(&kvm->lock);
+               return ret;
+       }
+       case KVM_ARM_SET_DEVICE_ADDR: {
+               struct kvm_arm_device_addr dev_addr;
+               if (copy_from_user(&dev_addr, argp, sizeof(dev_addr)))
+                       return -EFAULT;
+               return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
+       }
+       case KVM_ARM_PREFERRED_TARGET: {
+               int err;
+               struct kvm_vcpu_init init;
+               err = kvm_vcpu_preferred_target(&init);
+               if (err)
+                       return err;
+               if (copy_to_user(argp, &init, sizeof(init)))
+                       return -EFAULT;
+               return 0;
+       }
+       default:
+               return -EINVAL;
+       }
+ }
+ static void cpu_init_hyp_mode(void)
+ {
+       phys_addr_t pgd_ptr;
+       unsigned long hyp_stack_ptr;
+       unsigned long vector_ptr;
+       unsigned long tpidr_el2;
+       /* Switch from the HYP stub to our own HYP init vector */
+       __hyp_set_vectors(kvm_get_idmap_vector());
+       /*
+        * Calculate the raw per-cpu offset without a translation from the
+        * kernel's mapping to the linear mapping, and store it in tpidr_el2
+        * so that we can use adr_l to access per-cpu variables in EL2.
+        */
+       tpidr_el2 = ((unsigned long)this_cpu_ptr(&kvm_host_data) -
+                    (unsigned long)kvm_ksym_ref(kvm_host_data));
+       pgd_ptr = kvm_mmu_get_httbr();
+       hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE;
+       vector_ptr = (unsigned long)kvm_get_hyp_vector();
+       /*
+        * Call initialization code, and switch to the full blown HYP code.
+        * If the cpucaps haven't been finalized yet, something has gone very
+        * wrong, and hyp will crash and burn when it uses any
+        * cpus_have_const_cap() wrapper.
+        */
+       BUG_ON(!system_capabilities_finalized());
+       __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2);
+       /*
+        * Disabling SSBD on a non-VHE system requires us to enable SSBS
+        * at EL2.
+        */
+       if (this_cpu_has_cap(ARM64_SSBS) &&
+           arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
+               kvm_call_hyp(__kvm_enable_ssbs);
+       }
+ }
+ static void cpu_hyp_reset(void)
+ {
+       if (!is_kernel_in_hyp_mode())
+               __hyp_reset_vectors();
+ }
+ static void cpu_hyp_reinit(void)
+ {
+       kvm_init_host_cpu_context(&this_cpu_ptr(&kvm_host_data)->host_ctxt);
+       cpu_hyp_reset();
+       if (is_kernel_in_hyp_mode())
+               kvm_timer_init_vhe();
+       else
+               cpu_init_hyp_mode();
+       kvm_arm_init_debug();
+       if (vgic_present)
+               kvm_vgic_init_cpu_hardware();
+ }
+ static void _kvm_arch_hardware_enable(void *discard)
+ {
+       if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
+               cpu_hyp_reinit();
+               __this_cpu_write(kvm_arm_hardware_enabled, 1);
+       }
+ }
+ int kvm_arch_hardware_enable(void)
+ {
+       _kvm_arch_hardware_enable(NULL);
+       return 0;
+ }
+ static void _kvm_arch_hardware_disable(void *discard)
+ {
+       if (__this_cpu_read(kvm_arm_hardware_enabled)) {
+               cpu_hyp_reset();
+               __this_cpu_write(kvm_arm_hardware_enabled, 0);
+       }
+ }
+ void kvm_arch_hardware_disable(void)
+ {
+       _kvm_arch_hardware_disable(NULL);
+ }
+ #ifdef CONFIG_CPU_PM
+ static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
+                                   unsigned long cmd,
+                                   void *v)
+ {
+       /*
+        * kvm_arm_hardware_enabled is left with its old value over
+        * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should
+        * re-enable hyp.
+        */
+       switch (cmd) {
+       case CPU_PM_ENTER:
+               if (__this_cpu_read(kvm_arm_hardware_enabled))
+                       /*
+                        * don't update kvm_arm_hardware_enabled here
+                        * so that the hardware will be re-enabled
+                        * when we resume. See below.
+                        */
+                       cpu_hyp_reset();
+               return NOTIFY_OK;
+       case CPU_PM_ENTER_FAILED:
+       case CPU_PM_EXIT:
+               if (__this_cpu_read(kvm_arm_hardware_enabled))
+                       /* The hardware was enabled before suspend. */
+                       cpu_hyp_reinit();
+               return NOTIFY_OK;
+       default:
+               return NOTIFY_DONE;
+       }
+ }
+ static struct notifier_block hyp_init_cpu_pm_nb = {
+       .notifier_call = hyp_init_cpu_pm_notifier,
+ };
+ static void __init hyp_cpu_pm_init(void)
+ {
+       cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
+ }
+ static void __init hyp_cpu_pm_exit(void)
+ {
+       cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);
+ }
+ #else
+ static inline void hyp_cpu_pm_init(void)
+ {
+ }
+ static inline void hyp_cpu_pm_exit(void)
+ {
+ }
+ #endif
+ static int init_common_resources(void)
+ {
++      return kvm_set_ipa_limit();
+ }
+ static int init_subsystems(void)
+ {
+       int err = 0;
+       /*
+        * Enable hardware so that subsystem initialisation can access EL2.
+        */
+       on_each_cpu(_kvm_arch_hardware_enable, NULL, 1);
+       /*
+        * Register CPU lower-power notifier
+        */
+       hyp_cpu_pm_init();
+       /*
+        * Init HYP view of VGIC
+        */
+       err = kvm_vgic_hyp_init();
+       switch (err) {
+       case 0:
+               vgic_present = true;
+               break;
+       case -ENODEV:
+       case -ENXIO:
+               vgic_present = false;
+               err = 0;
+               break;
+       default:
+               goto out;
+       }
+       /*
+        * Init HYP architected timer support
+        */
+       err = kvm_timer_hyp_init(vgic_present);
+       if (err)
+               goto out;
+       kvm_perf_init();
+       kvm_coproc_table_init();
+ out:
+       on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
+       return err;
+ }
+ static void teardown_hyp_mode(void)
+ {
+       int cpu;
+       free_hyp_pgds();
+       for_each_possible_cpu(cpu)
+               free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
+ }
+ /**
+  * Inits Hyp-mode on all online CPUs
+  */
+ static int init_hyp_mode(void)
+ {
+       int cpu;
+       int err = 0;
+       /*
+        * Allocate Hyp PGD and setup Hyp identity mapping
+        */
+       err = kvm_mmu_init();
+       if (err)
+               goto out_err;
+       /*
+        * Allocate stack pages for Hypervisor-mode
+        */
+       for_each_possible_cpu(cpu) {
+               unsigned long stack_page;
+               stack_page = __get_free_page(GFP_KERNEL);
+               if (!stack_page) {
+                       err = -ENOMEM;
+                       goto out_err;
+               }
+               per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
+       }
+       /*
+        * Map the Hyp-code called directly from the host
+        */
+       err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start),
+                                 kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC);
+       if (err) {
+               kvm_err("Cannot map world-switch code\n");
+               goto out_err;
+       }
+       err = create_hyp_mappings(kvm_ksym_ref(__start_rodata),
+                                 kvm_ksym_ref(__end_rodata), PAGE_HYP_RO);
+       if (err) {
+               kvm_err("Cannot map rodata section\n");
+               goto out_err;
+       }
+       err = create_hyp_mappings(kvm_ksym_ref(__bss_start),
+                                 kvm_ksym_ref(__bss_stop), PAGE_HYP_RO);
+       if (err) {
+               kvm_err("Cannot map bss section\n");
+               goto out_err;
+       }
+       err = kvm_map_vectors();
+       if (err) {
+               kvm_err("Cannot map vectors\n");
+               goto out_err;
+       }
+       /*
+        * Map the Hyp stack pages
+        */
+       for_each_possible_cpu(cpu) {
+               char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
+               err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE,
+                                         PAGE_HYP);
+               if (err) {
+                       kvm_err("Cannot map hyp stack\n");
+                       goto out_err;
+               }
+       }
+       for_each_possible_cpu(cpu) {
+               kvm_host_data_t *cpu_data;
+               cpu_data = per_cpu_ptr(&kvm_host_data, cpu);
+               err = create_hyp_mappings(cpu_data, cpu_data + 1, PAGE_HYP);
+               if (err) {
+                       kvm_err("Cannot map host CPU state: %d\n", err);
+                       goto out_err;
+               }
+       }
+       err = hyp_map_aux_data();
+       if (err)
+               kvm_err("Cannot map host auxiliary data: %d\n", err);
+       return 0;
+ out_err:
+       teardown_hyp_mode();
+       kvm_err("error initializing Hyp mode: %d\n", err);
+       return err;
+ }
+ static void check_kvm_target_cpu(void *ret)
+ {
+       *(int *)ret = kvm_target_cpu();
+ }
+ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
+ {
+       struct kvm_vcpu *vcpu;
+       int i;
+       mpidr &= MPIDR_HWID_BITMASK;
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
+                       return vcpu;
+       }
+       return NULL;
+ }
+ bool kvm_arch_has_irq_bypass(void)
+ {
+       return true;
+ }
+ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
+                                     struct irq_bypass_producer *prod)
+ {
+       struct kvm_kernel_irqfd *irqfd =
+               container_of(cons, struct kvm_kernel_irqfd, consumer);
+       return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq,
+                                         &irqfd->irq_entry);
+ }
+ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
+                                     struct irq_bypass_producer *prod)
+ {
+       struct kvm_kernel_irqfd *irqfd =
+               container_of(cons, struct kvm_kernel_irqfd, consumer);
+       kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq,
+                                    &irqfd->irq_entry);
+ }
+ void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
+ {
+       struct kvm_kernel_irqfd *irqfd =
+               container_of(cons, struct kvm_kernel_irqfd, consumer);
+       kvm_arm_halt_guest(irqfd->kvm);
+ }
+ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
+ {
+       struct kvm_kernel_irqfd *irqfd =
+               container_of(cons, struct kvm_kernel_irqfd, consumer);
+       kvm_arm_resume_guest(irqfd->kvm);
+ }
+ /**
+  * Initialize Hyp-mode and memory mappings on all CPUs.
+  */
+ int kvm_arch_init(void *opaque)
+ {
+       int err;
+       int ret, cpu;
+       bool in_hyp_mode;
+       if (!is_hyp_mode_available()) {
+               kvm_info("HYP mode not available\n");
+               return -ENODEV;
+       }
+       in_hyp_mode = is_kernel_in_hyp_mode();
+       if (!in_hyp_mode && kvm_arch_requires_vhe()) {
+               kvm_pr_unimpl("CPU unsupported in non-VHE mode, not initializing\n");
+               return -ENODEV;
+       }
+       for_each_online_cpu(cpu) {
+               smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
+               if (ret < 0) {
+                       kvm_err("Error, CPU %d not supported!\n", cpu);
+                       return -ENODEV;
+               }
+       }
+       err = init_common_resources();
+       if (err)
+               return err;
+       err = kvm_arm_init_sve();
+       if (err)
+               return err;
+       if (!in_hyp_mode) {
+               err = init_hyp_mode();
+               if (err)
+                       goto out_err;
+       }
+       err = init_subsystems();
+       if (err)
+               goto out_hyp;
+       if (in_hyp_mode)
+               kvm_info("VHE mode initialized successfully\n");
+       else
+               kvm_info("Hyp mode initialized successfully\n");
+       return 0;
+ out_hyp:
+       hyp_cpu_pm_exit();
+       if (!in_hyp_mode)
+               teardown_hyp_mode();
+ out_err:
+       return err;
+ }
+ /* NOP: Compiling as a module not supported */
+ void kvm_arch_exit(void)
+ {
+       kvm_perf_teardown();
+ }
+ static int arm_init(void)
+ {
+       int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+       return rc;
+ }
+ module_init(arm_init);
Simple merge
index 70cd7bcca4332537f053582097800284d1346134,865c8aa670bc2b9065e6fd4c962ee57bc48eea70..d3b2090237274f8ffe8f4d89de92074390d4075c
@@@ -36,16 -36,20 +36,12 @@@ static u32 kvm_ipa_limit
  /*
   * ARMv8 Reset Values
   */
- static const struct kvm_regs default_regs_reset = {
-       .regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT |
-                       PSR_F_BIT | PSR_D_BIT),
- };
+ #define VCPU_RESET_PSTATE_EL1 (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \
+                                PSR_F_BIT | PSR_D_BIT)
  
- static const struct kvm_regs default_regs_reset32 = {
-       .regs.pstate = (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT |
-                       PSR_AA32_I_BIT | PSR_AA32_F_BIT),
- };
+ #define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \
+                                PSR_AA32_I_BIT | PSR_AA32_F_BIT)
  
 -static bool cpu_has_32bit_el1(void)
 -{
 -      u64 pfr0;
 -
 -      pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
 -      return !!(pfr0 & 0x20);
 -}
 -
  /**
   * kvm_arch_vm_ioctl_check_extension
   *
@@@ -280,11 -284,11 +276,11 @@@ int kvm_reset_vcpu(struct kvm_vcpu *vcp
        switch (vcpu->arch.target) {
        default:
                if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
 -                      if (!cpu_has_32bit_el1())
 +                      if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1))
                                goto out;
-                       cpu_reset = &default_regs_reset32;
+                       pstate = VCPU_RESET_PSTATE_SVC;
                } else {
-                       cpu_reset = &default_regs_reset;
+                       pstate = VCPU_RESET_PSTATE_EL1;
                }
  
                break;
Simple merge
index 0000000000000000000000000000000000000000,89a14ec8b33bb2009786d921759868943a152e24..d2339a2b9fb9c5a6ebdc9f6c48d399f8c49423bb
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,1063 +1,1063 @@@
 -       * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.txt
+ // SPDX-License-Identifier: GPL-2.0-only
+ /*
+  * VGICv3 MMIO handling functions
+  */
+ #include <linux/bitfield.h>
+ #include <linux/irqchip/arm-gic-v3.h>
+ #include <linux/kvm.h>
+ #include <linux/kvm_host.h>
+ #include <linux/interrupt.h>
+ #include <kvm/iodev.h>
+ #include <kvm/arm_vgic.h>
+ #include <asm/kvm_emulate.h>
+ #include <asm/kvm_arm.h>
+ #include <asm/kvm_mmu.h>
+ #include "vgic.h"
+ #include "vgic-mmio.h"
+ /* extract @num bytes at @offset bytes offset in data */
+ unsigned long extract_bytes(u64 data, unsigned int offset,
+                           unsigned int num)
+ {
+       return (data >> (offset * 8)) & GENMASK_ULL(num * 8 - 1, 0);
+ }
+ /* allows updates of any half of a 64-bit register (or the whole thing) */
+ u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len,
+                    unsigned long val)
+ {
+       int lower = (offset & 4) * 8;
+       int upper = lower + 8 * len - 1;
+       reg &= ~GENMASK_ULL(upper, lower);
+       val &= GENMASK_ULL(len * 8 - 1, 0);
+       return reg | ((u64)val << lower);
+ }
+ bool vgic_has_its(struct kvm *kvm)
+ {
+       struct vgic_dist *dist = &kvm->arch.vgic;
+       if (dist->vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3)
+               return false;
+       return dist->has_its;
+ }
+ bool vgic_supports_direct_msis(struct kvm *kvm)
+ {
+       return (kvm_vgic_global_state.has_gicv4_1 ||
+               (kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm)));
+ }
+ /*
+  * The Revision field in the IIDR have the following meanings:
+  *
+  * Revision 2: Interrupt groups are guest-configurable and signaled using
+  *           their configured groups.
+  */
+ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
+                                           gpa_t addr, unsigned int len)
+ {
+       struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
+       u32 value = 0;
+       switch (addr & 0x0c) {
+       case GICD_CTLR:
+               if (vgic->enabled)
+                       value |= GICD_CTLR_ENABLE_SS_G1;
+               value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
+               if (vgic->nassgireq)
+                       value |= GICD_CTLR_nASSGIreq;
+               break;
+       case GICD_TYPER:
+               value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS;
+               value = (value >> 5) - 1;
+               if (vgic_has_its(vcpu->kvm)) {
+                       value |= (INTERRUPT_ID_BITS_ITS - 1) << 19;
+                       value |= GICD_TYPER_LPIS;
+               } else {
+                       value |= (INTERRUPT_ID_BITS_SPIS - 1) << 19;
+               }
+               break;
+       case GICD_TYPER2:
+               if (kvm_vgic_global_state.has_gicv4_1)
+                       value = GICD_TYPER2_nASSGIcap;
+               break;
+       case GICD_IIDR:
+               value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) |
+                       (vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) |
+                       (IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT);
+               break;
+       default:
+               return 0;
+       }
+       return value;
+ }
+ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
+                                   gpa_t addr, unsigned int len,
+                                   unsigned long val)
+ {
+       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+       switch (addr & 0x0c) {
+       case GICD_CTLR: {
+               bool was_enabled, is_hwsgi;
+               mutex_lock(&vcpu->kvm->lock);
+               was_enabled = dist->enabled;
+               is_hwsgi = dist->nassgireq;
+               dist->enabled = val & GICD_CTLR_ENABLE_SS_G1;
+               /* Not a GICv4.1? No HW SGIs */
+               if (!kvm_vgic_global_state.has_gicv4_1)
+                       val &= ~GICD_CTLR_nASSGIreq;
+               /* Dist stays enabled? nASSGIreq is RO */
+               if (was_enabled && dist->enabled) {
+                       val &= ~GICD_CTLR_nASSGIreq;
+                       val |= FIELD_PREP(GICD_CTLR_nASSGIreq, is_hwsgi);
+               }
+               /* Switching HW SGIs? */
+               dist->nassgireq = val & GICD_CTLR_nASSGIreq;
+               if (is_hwsgi != dist->nassgireq)
+                       vgic_v4_configure_vsgis(vcpu->kvm);
+               if (kvm_vgic_global_state.has_gicv4_1 &&
+                   was_enabled != dist->enabled)
+                       kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_RELOAD_GICv4);
+               else if (!was_enabled && dist->enabled)
+                       vgic_kick_vcpus(vcpu->kvm);
+               mutex_unlock(&vcpu->kvm->lock);
+               break;
+       }
+       case GICD_TYPER:
+       case GICD_TYPER2:
+       case GICD_IIDR:
+               /* This is at best for documentation purposes... */
+               return;
+       }
+ }
+ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu,
+                                          gpa_t addr, unsigned int len,
+                                          unsigned long val)
+ {
+       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+       switch (addr & 0x0c) {
+       case GICD_TYPER2:
+       case GICD_IIDR:
+               if (val != vgic_mmio_read_v3_misc(vcpu, addr, len))
+                       return -EINVAL;
+               return 0;
+       case GICD_CTLR:
+               /* Not a GICv4.1? No HW SGIs */
+               if (!kvm_vgic_global_state.has_gicv4_1)
+                       val &= ~GICD_CTLR_nASSGIreq;
+               dist->enabled = val & GICD_CTLR_ENABLE_SS_G1;
+               dist->nassgireq = val & GICD_CTLR_nASSGIreq;
+               return 0;
+       }
+       vgic_mmio_write_v3_misc(vcpu, addr, len, val);
+       return 0;
+ }
+ static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu,
+                                           gpa_t addr, unsigned int len)
+ {
+       int intid = VGIC_ADDR_TO_INTID(addr, 64);
+       struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
+       unsigned long ret = 0;
+       if (!irq)
+               return 0;
+       /* The upper word is RAZ for us. */
+       if (!(addr & 4))
+               ret = extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len);
+       vgic_put_irq(vcpu->kvm, irq);
+       return ret;
+ }
+ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
+                                   gpa_t addr, unsigned int len,
+                                   unsigned long val)
+ {
+       int intid = VGIC_ADDR_TO_INTID(addr, 64);
+       struct vgic_irq *irq;
+       unsigned long flags;
+       /* The upper word is WI for us since we don't implement Aff3. */
+       if (addr & 4)
+               return;
+       irq = vgic_get_irq(vcpu->kvm, NULL, intid);
+       if (!irq)
+               return;
+       raw_spin_lock_irqsave(&irq->irq_lock, flags);
+       /* We only care about and preserve Aff0, Aff1 and Aff2. */
+       irq->mpidr = val & GENMASK(23, 0);
+       irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr);
+       raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+       vgic_put_irq(vcpu->kvm, irq);
+ }
+ static unsigned long vgic_mmio_read_v3r_ctlr(struct kvm_vcpu *vcpu,
+                                            gpa_t addr, unsigned int len)
+ {
+       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+       return vgic_cpu->lpis_enabled ? GICR_CTLR_ENABLE_LPIS : 0;
+ }
+ static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
+                                    gpa_t addr, unsigned int len,
+                                    unsigned long val)
+ {
+       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+       bool was_enabled = vgic_cpu->lpis_enabled;
+       if (!vgic_has_its(vcpu->kvm))
+               return;
+       vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS;
+       if (was_enabled && !vgic_cpu->lpis_enabled) {
+               vgic_flush_pending_lpis(vcpu);
+               vgic_its_invalidate_cache(vcpu->kvm);
+       }
+       if (!was_enabled && vgic_cpu->lpis_enabled)
+               vgic_enable_lpis(vcpu);
+ }
+ static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
+                                             gpa_t addr, unsigned int len)
+ {
+       unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
+       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+       struct vgic_redist_region *rdreg = vgic_cpu->rdreg;
+       int target_vcpu_id = vcpu->vcpu_id;
+       gpa_t last_rdist_typer = rdreg->base + GICR_TYPER +
+                       (rdreg->free_index - 1) * KVM_VGIC_V3_REDIST_SIZE;
+       u64 value;
+       value = (u64)(mpidr & GENMASK(23, 0)) << 32;
+       value |= ((target_vcpu_id & 0xffff) << 8);
+       if (addr == last_rdist_typer)
+               value |= GICR_TYPER_LAST;
+       if (vgic_has_its(vcpu->kvm))
+               value |= GICR_TYPER_PLPIS;
+       return extract_bytes(value, addr & 7, len);
+ }
+ static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu,
+                                            gpa_t addr, unsigned int len)
+ {
+       return (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+ }
+ static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu,
+                                             gpa_t addr, unsigned int len)
+ {
+       switch (addr & 0xffff) {
+       case GICD_PIDR2:
+               /* report a GICv3 compliant implementation */
+               return 0x3b;
+       }
+       return 0;
+ }
+ static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
+                                                 gpa_t addr, unsigned int len)
+ {
+       u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+       u32 value = 0;
+       int i;
+       /*
+        * pending state of interrupt is latched in pending_latch variable.
+        * Userspace will save and restore pending state and line_level
+        * separately.
++       * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
+        * for handling of ISPENDR and ICPENDR.
+        */
+       for (i = 0; i < len * 8; i++) {
+               struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+               bool state = irq->pending_latch;
+               if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
+                       int err;
+                       err = irq_get_irqchip_state(irq->host_irq,
+                                                   IRQCHIP_STATE_PENDING,
+                                                   &state);
+                       WARN_ON(err);
+               }
+               if (state)
+                       value |= (1U << i);
+               vgic_put_irq(vcpu->kvm, irq);
+       }
+       return value;
+ }
+ static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
+                                        gpa_t addr, unsigned int len,
+                                        unsigned long val)
+ {
+       u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+       int i;
+       unsigned long flags;
+       for (i = 0; i < len * 8; i++) {
+               struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+               raw_spin_lock_irqsave(&irq->irq_lock, flags);
+               if (test_bit(i, &val)) {
+                       /*
+                        * pending_latch is set irrespective of irq type
+                        * (level or edge) to avoid dependency that VM should
+                        * restore irq config before pending info.
+                        */
+                       irq->pending_latch = true;
+                       vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+               } else {
+                       irq->pending_latch = false;
+                       raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+               }
+               vgic_put_irq(vcpu->kvm, irq);
+       }
+       return 0;
+ }
+ /* We want to avoid outer shareable. */
+ u64 vgic_sanitise_shareability(u64 field)
+ {
+       switch (field) {
+       case GIC_BASER_OuterShareable:
+               return GIC_BASER_InnerShareable;
+       default:
+               return field;
+       }
+ }
+ /* Avoid any inner non-cacheable mapping. */
+ u64 vgic_sanitise_inner_cacheability(u64 field)
+ {
+       switch (field) {
+       case GIC_BASER_CACHE_nCnB:
+       case GIC_BASER_CACHE_nC:
+               return GIC_BASER_CACHE_RaWb;
+       default:
+               return field;
+       }
+ }
+ /* Non-cacheable or same-as-inner are OK. */
+ u64 vgic_sanitise_outer_cacheability(u64 field)
+ {
+       switch (field) {
+       case GIC_BASER_CACHE_SameAsInner:
+       case GIC_BASER_CACHE_nC:
+               return field;
+       default:
+               return GIC_BASER_CACHE_nC;
+       }
+ }
+ u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift,
+                       u64 (*sanitise_fn)(u64))
+ {
+       u64 field = (reg & field_mask) >> field_shift;
+       field = sanitise_fn(field) << field_shift;
+       return (reg & ~field_mask) | field;
+ }
+ #define PROPBASER_RES0_MASK                                           \
+       (GENMASK_ULL(63, 59) | GENMASK_ULL(55, 52) | GENMASK_ULL(6, 5))
+ #define PENDBASER_RES0_MASK                                           \
+       (BIT_ULL(63) | GENMASK_ULL(61, 59) | GENMASK_ULL(55, 52) |      \
+        GENMASK_ULL(15, 12) | GENMASK_ULL(6, 0))
+ static u64 vgic_sanitise_pendbaser(u64 reg)
+ {
+       reg = vgic_sanitise_field(reg, GICR_PENDBASER_SHAREABILITY_MASK,
+                                 GICR_PENDBASER_SHAREABILITY_SHIFT,
+                                 vgic_sanitise_shareability);
+       reg = vgic_sanitise_field(reg, GICR_PENDBASER_INNER_CACHEABILITY_MASK,
+                                 GICR_PENDBASER_INNER_CACHEABILITY_SHIFT,
+                                 vgic_sanitise_inner_cacheability);
+       reg = vgic_sanitise_field(reg, GICR_PENDBASER_OUTER_CACHEABILITY_MASK,
+                                 GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT,
+                                 vgic_sanitise_outer_cacheability);
+       reg &= ~PENDBASER_RES0_MASK;
+       return reg;
+ }
+ static u64 vgic_sanitise_propbaser(u64 reg)
+ {
+       reg = vgic_sanitise_field(reg, GICR_PROPBASER_SHAREABILITY_MASK,
+                                 GICR_PROPBASER_SHAREABILITY_SHIFT,
+                                 vgic_sanitise_shareability);
+       reg = vgic_sanitise_field(reg, GICR_PROPBASER_INNER_CACHEABILITY_MASK,
+                                 GICR_PROPBASER_INNER_CACHEABILITY_SHIFT,
+                                 vgic_sanitise_inner_cacheability);
+       reg = vgic_sanitise_field(reg, GICR_PROPBASER_OUTER_CACHEABILITY_MASK,
+                                 GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT,
+                                 vgic_sanitise_outer_cacheability);
+       reg &= ~PROPBASER_RES0_MASK;
+       return reg;
+ }
+ static unsigned long vgic_mmio_read_propbase(struct kvm_vcpu *vcpu,
+                                            gpa_t addr, unsigned int len)
+ {
+       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+       return extract_bytes(dist->propbaser, addr & 7, len);
+ }
+ static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu,
+                                    gpa_t addr, unsigned int len,
+                                    unsigned long val)
+ {
+       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+       u64 old_propbaser, propbaser;
+       /* Storing a value with LPIs already enabled is undefined */
+       if (vgic_cpu->lpis_enabled)
+               return;
+       do {
+               old_propbaser = READ_ONCE(dist->propbaser);
+               propbaser = old_propbaser;
+               propbaser = update_64bit_reg(propbaser, addr & 4, len, val);
+               propbaser = vgic_sanitise_propbaser(propbaser);
+       } while (cmpxchg64(&dist->propbaser, old_propbaser,
+                          propbaser) != old_propbaser);
+ }
+ static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu,
+                                            gpa_t addr, unsigned int len)
+ {
+       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+       u64 value = vgic_cpu->pendbaser;
+       value &= ~GICR_PENDBASER_PTZ;
+       return extract_bytes(value, addr & 7, len);
+ }
+ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
+                                    gpa_t addr, unsigned int len,
+                                    unsigned long val)
+ {
+       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+       u64 old_pendbaser, pendbaser;
+       /* Storing a value with LPIs already enabled is undefined */
+       if (vgic_cpu->lpis_enabled)
+               return;
+       do {
+               old_pendbaser = READ_ONCE(vgic_cpu->pendbaser);
+               pendbaser = old_pendbaser;
+               pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val);
+               pendbaser = vgic_sanitise_pendbaser(pendbaser);
+       } while (cmpxchg64(&vgic_cpu->pendbaser, old_pendbaser,
+                          pendbaser) != old_pendbaser);
+ }
+ /*
+  * The GICv3 per-IRQ registers are split to control PPIs and SGIs in the
+  * redistributors, while SPIs are covered by registers in the distributor
+  * block. Trying to set private IRQs in this block gets ignored.
+  * We take some special care here to fix the calculation of the register
+  * offset.
+  */
+ #define REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(off, rd, wr, ur, uw, bpi, acc) \
+       {                                                               \
+               .reg_offset = off,                                      \
+               .bits_per_irq = bpi,                                    \
+               .len = (bpi * VGIC_NR_PRIVATE_IRQS) / 8,                \
+               .access_flags = acc,                                    \
+               .read = vgic_mmio_read_raz,                             \
+               .write = vgic_mmio_write_wi,                            \
+       }, {                                                            \
+               .reg_offset = off + (bpi * VGIC_NR_PRIVATE_IRQS) / 8,   \
+               .bits_per_irq = bpi,                                    \
+               .len = (bpi * (1024 - VGIC_NR_PRIVATE_IRQS)) / 8,       \
+               .access_flags = acc,                                    \
+               .read = rd,                                             \
+               .write = wr,                                            \
+               .uaccess_read = ur,                                     \
+               .uaccess_write = uw,                                    \
+       }
+ static const struct vgic_register_region vgic_v3_dist_registers[] = {
+       REGISTER_DESC_WITH_LENGTH_UACCESS(GICD_CTLR,
+               vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc,
+               NULL, vgic_mmio_uaccess_write_v3_misc,
+               16, VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH(GICD_STATUSR,
+               vgic_mmio_read_rao, vgic_mmio_write_wi, 4,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR,
+               vgic_mmio_read_group, vgic_mmio_write_group, NULL, NULL, 1,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER,
+               vgic_mmio_read_enable, vgic_mmio_write_senable,
+               NULL, vgic_uaccess_write_senable, 1,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICENABLER,
+               vgic_mmio_read_enable, vgic_mmio_write_cenable,
+              NULL, vgic_uaccess_write_cenable, 1,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR,
+               vgic_mmio_read_pending, vgic_mmio_write_spending,
+               vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
+               vgic_mmio_read_pending, vgic_mmio_write_cpending,
+               vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 1,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER,
+               vgic_mmio_read_active, vgic_mmio_write_sactive,
+               vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 1,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER,
+               vgic_mmio_read_active, vgic_mmio_write_cactive,
+               vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive,
+               1, VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR,
+               vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL,
+               8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
+       REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ITARGETSR,
+               vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 8,
+               VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
+       REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICFGR,
+               vgic_mmio_read_config, vgic_mmio_write_config, NULL, NULL, 2,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGRPMODR,
+               vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 1,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IROUTER,
+               vgic_mmio_read_irouter, vgic_mmio_write_irouter, NULL, NULL, 64,
+               VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH(GICD_IDREGS,
+               vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
+               VGIC_ACCESS_32bit),
+ };
+ static const struct vgic_register_region vgic_v3_rd_registers[] = {
+       /* RD_base registers */
+       REGISTER_DESC_WITH_LENGTH(GICR_CTLR,
+               vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH(GICR_STATUSR,
+               vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH(GICR_IIDR,
+               vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH(GICR_TYPER,
+               vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8,
+               VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH(GICR_WAKER,
+               vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER,
+               vgic_mmio_read_propbase, vgic_mmio_write_propbase, 8,
+               VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH(GICR_PENDBASER,
+               vgic_mmio_read_pendbase, vgic_mmio_write_pendbase, 8,
+               VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH(GICR_IDREGS,
+               vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
+               VGIC_ACCESS_32bit),
+       /* SGI_base registers */
+       REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGROUPR0,
+               vgic_mmio_read_group, vgic_mmio_write_group, 4,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISENABLER0,
+               vgic_mmio_read_enable, vgic_mmio_write_senable,
+               NULL, vgic_uaccess_write_senable, 4,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICENABLER0,
+               vgic_mmio_read_enable, vgic_mmio_write_cenable,
+               NULL, vgic_uaccess_write_cenable, 4,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0,
+               vgic_mmio_read_pending, vgic_mmio_write_spending,
+               vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0,
+               vgic_mmio_read_pending, vgic_mmio_write_cpending,
+               vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 4,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISACTIVER0,
+               vgic_mmio_read_active, vgic_mmio_write_sactive,
+               vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 4,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICACTIVER0,
+               vgic_mmio_read_active, vgic_mmio_write_cactive,
+               vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive, 4,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IPRIORITYR0,
+               vgic_mmio_read_priority, vgic_mmio_write_priority, 32,
+               VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
+       REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICFGR0,
+               vgic_mmio_read_config, vgic_mmio_write_config, 8,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGRPMODR0,
+               vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
+               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_NSACR,
+               vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
+               VGIC_ACCESS_32bit),
+ };
+ unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev)
+ {
+       dev->regions = vgic_v3_dist_registers;
+       dev->nr_regions = ARRAY_SIZE(vgic_v3_dist_registers);
+       kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops);
+       return SZ_64K;
+ }
+ /**
+  * vgic_register_redist_iodev - register a single redist iodev
+  * @vcpu:    The VCPU to which the redistributor belongs
+  *
+  * Register a KVM iodev for this VCPU's redistributor using the address
+  * provided.
+  *
+  * Return 0 on success, -ERRNO otherwise.
+  */
+ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
+ {
+       struct kvm *kvm = vcpu->kvm;
+       struct vgic_dist *vgic = &kvm->arch.vgic;
+       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+       struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
+       struct vgic_redist_region *rdreg;
+       gpa_t rd_base;
+       int ret;
+       if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr))
+               return 0;
+       /*
+        * We may be creating VCPUs before having set the base address for the
+        * redistributor region, in which case we will come back to this
+        * function for all VCPUs when the base address is set.  Just return
+        * without doing any work for now.
+        */
+       rdreg = vgic_v3_rdist_free_slot(&vgic->rd_regions);
+       if (!rdreg)
+               return 0;
+       if (!vgic_v3_check_base(kvm))
+               return -EINVAL;
+       vgic_cpu->rdreg = rdreg;
+       rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE;
+       kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
+       rd_dev->base_addr = rd_base;
+       rd_dev->iodev_type = IODEV_REDIST;
+       rd_dev->regions = vgic_v3_rd_registers;
+       rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rd_registers);
+       rd_dev->redist_vcpu = vcpu;
+       mutex_lock(&kvm->slots_lock);
+       ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
+                                     2 * SZ_64K, &rd_dev->dev);
+       mutex_unlock(&kvm->slots_lock);
+       if (ret)
+               return ret;
+       rdreg->free_index++;
+       return 0;
+ }
+ static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
+ {
+       struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
+       kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &rd_dev->dev);
+ }
+ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
+ {
+       struct kvm_vcpu *vcpu;
+       int c, ret = 0;
+       kvm_for_each_vcpu(c, vcpu, kvm) {
+               ret = vgic_register_redist_iodev(vcpu);
+               if (ret)
+                       break;
+       }
+       if (ret) {
+               /* The current c failed, so we start with the previous one. */
+               mutex_lock(&kvm->slots_lock);
+               for (c--; c >= 0; c--) {
+                       vcpu = kvm_get_vcpu(kvm, c);
+                       vgic_unregister_redist_iodev(vcpu);
+               }
+               mutex_unlock(&kvm->slots_lock);
+       }
+       return ret;
+ }
+ /**
+  * vgic_v3_insert_redist_region - Insert a new redistributor region
+  *
+  * Performs various checks before inserting the rdist region in the list.
+  * Those tests depend on whether the size of the rdist region is known
+  * (ie. count != 0). The list is sorted by rdist region index.
+  *
+  * @kvm: kvm handle
+  * @index: redist region index
+  * @base: base of the new rdist region
+  * @count: number of redistributors the region is made of (0 in the old style
+  * single region, whose size is induced from the number of vcpus)
+  *
+  * Return 0 on success, < 0 otherwise
+  */
+ static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index,
+                                       gpa_t base, uint32_t count)
+ {
+       struct vgic_dist *d = &kvm->arch.vgic;
+       struct vgic_redist_region *rdreg;
+       struct list_head *rd_regions = &d->rd_regions;
+       size_t size = count * KVM_VGIC_V3_REDIST_SIZE;
+       int ret;
+       /* single rdist region already set ?*/
+       if (!count && !list_empty(rd_regions))
+               return -EINVAL;
+       /* cross the end of memory ? */
+       if (base + size < base)
+               return -EINVAL;
+       if (list_empty(rd_regions)) {
+               if (index != 0)
+                       return -EINVAL;
+       } else {
+               rdreg = list_last_entry(rd_regions,
+                                       struct vgic_redist_region, list);
+               if (index != rdreg->index + 1)
+                       return -EINVAL;
+               /* Cannot add an explicitly sized regions after legacy region */
+               if (!rdreg->count)
+                       return -EINVAL;
+       }
+       /*
+        * For legacy single-region redistributor regions (!count),
+        * check that the redistributor region does not overlap with the
+        * distributor's address space.
+        */
+       if (!count && !IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) &&
+               vgic_dist_overlap(kvm, base, size))
+               return -EINVAL;
+       /* collision with any other rdist region? */
+       if (vgic_v3_rdist_overlap(kvm, base, size))
+               return -EINVAL;
+       rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL);
+       if (!rdreg)
+               return -ENOMEM;
+       rdreg->base = VGIC_ADDR_UNDEF;
+       ret = vgic_check_ioaddr(kvm, &rdreg->base, base, SZ_64K);
+       if (ret)
+               goto free;
+       rdreg->base = base;
+       rdreg->count = count;
+       rdreg->free_index = 0;
+       rdreg->index = index;
+       list_add_tail(&rdreg->list, rd_regions);
+       return 0;
+ free:
+       kfree(rdreg);
+       return ret;
+ }
+ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
+ {
+       int ret;
+       ret = vgic_v3_insert_redist_region(kvm, index, addr, count);
+       if (ret)
+               return ret;
+       /*
+        * Register iodevs for each existing VCPU.  Adding more VCPUs
+        * afterwards will register the iodevs when needed.
+        */
+       ret = vgic_register_all_redist_iodevs(kvm);
+       if (ret)
+               return ret;
+       return 0;
+ }
+ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
+ {
+       const struct vgic_register_region *region;
+       struct vgic_io_device iodev;
+       struct vgic_reg_attr reg_attr;
+       struct kvm_vcpu *vcpu;
+       gpa_t addr;
+       int ret;
+       ret = vgic_v3_parse_attr(dev, attr, &reg_attr);
+       if (ret)
+               return ret;
+       vcpu = reg_attr.vcpu;
+       addr = reg_attr.addr;
+       switch (attr->group) {
+       case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+               iodev.regions = vgic_v3_dist_registers;
+               iodev.nr_regions = ARRAY_SIZE(vgic_v3_dist_registers);
+               iodev.base_addr = 0;
+               break;
+       case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:{
+               iodev.regions = vgic_v3_rd_registers;
+               iodev.nr_regions = ARRAY_SIZE(vgic_v3_rd_registers);
+               iodev.base_addr = 0;
+               break;
+       }
+       case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: {
+               u64 reg, id;
+               id = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK);
+               return vgic_v3_has_cpu_sysregs_attr(vcpu, 0, id, &reg);
+       }
+       default:
+               return -ENXIO;
+       }
+       /* We only support aligned 32-bit accesses. */
+       if (addr & 3)
+               return -ENXIO;
+       region = vgic_get_mmio_region(vcpu, &iodev, addr, sizeof(u32));
+       if (!region)
+               return -ENXIO;
+       return 0;
+ }
+ /*
+  * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI
+  * generation register ICC_SGI1R_EL1) with a given VCPU.
+  * If the VCPU's MPIDR matches, return the level0 affinity, otherwise
+  * return -1.
+  */
+ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
+ {
+       unsigned long affinity;
+       int level0;
+       /*
+        * Split the current VCPU's MPIDR into affinity level 0 and the
+        * rest as this is what we have to compare against.
+        */
+       affinity = kvm_vcpu_get_mpidr_aff(vcpu);
+       level0 = MPIDR_AFFINITY_LEVEL(affinity, 0);
+       affinity &= ~MPIDR_LEVEL_MASK;
+       /* bail out if the upper three levels don't match */
+       if (sgi_aff != affinity)
+               return -1;
+       /* Is this VCPU's bit set in the mask ? */
+       if (!(sgi_cpu_mask & BIT(level0)))
+               return -1;
+       return level0;
+ }
+ /*
+  * The ICC_SGI* registers encode the affinity differently from the MPIDR,
+  * so provide a wrapper to use the existing defines to isolate a certain
+  * affinity level.
+  */
+ #define SGI_AFFINITY_LEVEL(reg, level) \
+       ((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \
+       >> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level))
+ /**
+  * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs
+  * @vcpu: The VCPU requesting a SGI
+  * @reg: The value written into ICC_{ASGI1,SGI0,SGI1}R by that VCPU
+  * @allow_group1: Does the sysreg access allow generation of G1 SGIs
+  *
+  * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register.
+  * This will trap in sys_regs.c and call this function.
+  * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the
+  * target processors as well as a bitmask of 16 Aff0 CPUs.
+  * If the interrupt routing mode bit is not set, we iterate over all VCPUs to
+  * check for matching ones. If this bit is set, we signal all, but not the
+  * calling VCPU.
+  */
+ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
+ {
+       struct kvm *kvm = vcpu->kvm;
+       struct kvm_vcpu *c_vcpu;
+       u16 target_cpus;
+       u64 mpidr;
+       int sgi, c;
+       int vcpu_id = vcpu->vcpu_id;
+       bool broadcast;
+       unsigned long flags;
+       sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
+       broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);
+       target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT;
+       mpidr = SGI_AFFINITY_LEVEL(reg, 3);
+       mpidr |= SGI_AFFINITY_LEVEL(reg, 2);
+       mpidr |= SGI_AFFINITY_LEVEL(reg, 1);
+       /*
+        * We iterate over all VCPUs to find the MPIDRs matching the request.
+        * If we have handled one CPU, we clear its bit to detect early
+        * if we are already finished. This avoids iterating through all
+        * VCPUs when most of the times we just signal a single VCPU.
+        */
+       kvm_for_each_vcpu(c, c_vcpu, kvm) {
+               struct vgic_irq *irq;
+               /* Exit early if we have dealt with all requested CPUs */
+               if (!broadcast && target_cpus == 0)
+                       break;
+               /* Don't signal the calling VCPU */
+               if (broadcast && c == vcpu_id)
+                       continue;
+               if (!broadcast) {
+                       int level0;
+                       level0 = match_mpidr(mpidr, target_cpus, c_vcpu);
+                       if (level0 == -1)
+                               continue;
+                       /* remove this matching VCPU from the mask */
+                       target_cpus &= ~BIT(level0);
+               }
+               irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi);
+               raw_spin_lock_irqsave(&irq->irq_lock, flags);
+               /*
+                * An access targetting Group0 SGIs can only generate
+                * those, while an access targetting Group1 SGIs can
+                * generate interrupts of either group.
+                */
+               if (!irq->group || allow_group1) {
+                       if (!irq->hw) {
+                               irq->pending_latch = true;
+                               vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+                       } else {
+                               /* HW SGI? Ask the GIC to inject it */
+                               int err;
+                               err = irq_set_irqchip_state(irq->host_irq,
+                                                           IRQCHIP_STATE_PENDING,
+                                                           true);
+                               WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
+                               raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+                       }
+               } else {
+                       raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+               }
+               vgic_put_irq(vcpu->kvm, irq);
+       }
+ }
+ int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+                        int offset, u32 *val)
+ {
+       struct vgic_io_device dev = {
+               .regions = vgic_v3_dist_registers,
+               .nr_regions = ARRAY_SIZE(vgic_v3_dist_registers),
+       };
+       return vgic_uaccess(vcpu, &dev, is_write, offset, val);
+ }
+ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+                          int offset, u32 *val)
+ {
+       struct vgic_io_device rd_dev = {
+               .regions = vgic_v3_rd_registers,
+               .nr_regions = ARRAY_SIZE(vgic_v3_rd_registers),
+       };
+       return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val);
+ }
+ int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+                                   u32 intid, u64 *val)
+ {
+       if (intid % 32)
+               return -EINVAL;
+       if (is_write)
+               vgic_write_irq_line_level_info(vcpu, intid, *val);
+       else
+               *val = vgic_read_irq_line_level_info(vcpu, intid);
+       return 0;
+ }
index 0000000000000000000000000000000000000000,769e4802645ee8ef30f3761ba7c0ddc73bf652b2..64fcd75111108c6b7b3b5b74e434339138c44a57
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,321 +1,321 @@@
 - * As per Documentation/virt/kvm/devices/arm-vgic-v3.txt,
+ /* SPDX-License-Identifier: GPL-2.0-only */
+ /*
+  * Copyright (C) 2015, 2016 ARM Ltd.
+  */
+ #ifndef __KVM_ARM_VGIC_NEW_H__
+ #define __KVM_ARM_VGIC_NEW_H__
+ #include <linux/irqchip/arm-gic-common.h>
+ #define PRODUCT_ID_KVM                0x4b    /* ASCII code K */
+ #define IMPLEMENTER_ARM               0x43b
+ #define VGIC_ADDR_UNDEF               (-1)
+ #define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
+ #define INTERRUPT_ID_BITS_SPIS        10
+ #define INTERRUPT_ID_BITS_ITS 16
+ #define VGIC_PRI_BITS         5
+ #define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS)
+ #define VGIC_AFFINITY_0_SHIFT 0
+ #define VGIC_AFFINITY_0_MASK (0xffUL << VGIC_AFFINITY_0_SHIFT)
+ #define VGIC_AFFINITY_1_SHIFT 8
+ #define VGIC_AFFINITY_1_MASK (0xffUL << VGIC_AFFINITY_1_SHIFT)
+ #define VGIC_AFFINITY_2_SHIFT 16
+ #define VGIC_AFFINITY_2_MASK (0xffUL << VGIC_AFFINITY_2_SHIFT)
+ #define VGIC_AFFINITY_3_SHIFT 24
+ #define VGIC_AFFINITY_3_MASK (0xffUL << VGIC_AFFINITY_3_SHIFT)
+ #define VGIC_AFFINITY_LEVEL(reg, level) \
+       ((((reg) & VGIC_AFFINITY_## level ##_MASK) \
+       >> VGIC_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level))
+ /*
+  * The Userspace encodes the affinity differently from the MPIDR,
+  * Below macro converts vgic userspace format to MPIDR reg format.
+  */
+ #define VGIC_TO_MPIDR(val) (VGIC_AFFINITY_LEVEL(val, 0) | \
+                           VGIC_AFFINITY_LEVEL(val, 1) | \
+                           VGIC_AFFINITY_LEVEL(val, 2) | \
+                           VGIC_AFFINITY_LEVEL(val, 3))
+ /*
 - * As per Documentation/virt/kvm/devices/arm-vgic-its.txt,
++ * As per Documentation/virt/kvm/devices/arm-vgic-v3.rst,
+  * below macros are defined for CPUREG encoding.
+  */
+ #define KVM_REG_ARM_VGIC_SYSREG_OP0_MASK   0x000000000000c000
+ #define KVM_REG_ARM_VGIC_SYSREG_OP0_SHIFT  14
+ #define KVM_REG_ARM_VGIC_SYSREG_OP1_MASK   0x0000000000003800
+ #define KVM_REG_ARM_VGIC_SYSREG_OP1_SHIFT  11
+ #define KVM_REG_ARM_VGIC_SYSREG_CRN_MASK   0x0000000000000780
+ #define KVM_REG_ARM_VGIC_SYSREG_CRN_SHIFT  7
+ #define KVM_REG_ARM_VGIC_SYSREG_CRM_MASK   0x0000000000000078
+ #define KVM_REG_ARM_VGIC_SYSREG_CRM_SHIFT  3
+ #define KVM_REG_ARM_VGIC_SYSREG_OP2_MASK   0x0000000000000007
+ #define KVM_REG_ARM_VGIC_SYSREG_OP2_SHIFT  0
+ #define KVM_DEV_ARM_VGIC_SYSREG_MASK (KVM_REG_ARM_VGIC_SYSREG_OP0_MASK | \
+                                     KVM_REG_ARM_VGIC_SYSREG_OP1_MASK | \
+                                     KVM_REG_ARM_VGIC_SYSREG_CRN_MASK | \
+                                     KVM_REG_ARM_VGIC_SYSREG_CRM_MASK | \
+                                     KVM_REG_ARM_VGIC_SYSREG_OP2_MASK)
+ /*
++ * As per Documentation/virt/kvm/devices/arm-vgic-its.rst,
+  * below macros are defined for ITS table entry encoding.
+  */
+ #define KVM_ITS_CTE_VALID_SHIFT               63
+ #define KVM_ITS_CTE_VALID_MASK                BIT_ULL(63)
+ #define KVM_ITS_CTE_RDBASE_SHIFT      16
+ #define KVM_ITS_CTE_ICID_MASK         GENMASK_ULL(15, 0)
+ #define KVM_ITS_ITE_NEXT_SHIFT                48
+ #define KVM_ITS_ITE_PINTID_SHIFT      16
+ #define KVM_ITS_ITE_PINTID_MASK               GENMASK_ULL(47, 16)
+ #define KVM_ITS_ITE_ICID_MASK         GENMASK_ULL(15, 0)
+ #define KVM_ITS_DTE_VALID_SHIFT               63
+ #define KVM_ITS_DTE_VALID_MASK                BIT_ULL(63)
+ #define KVM_ITS_DTE_NEXT_SHIFT                49
+ #define KVM_ITS_DTE_NEXT_MASK         GENMASK_ULL(62, 49)
+ #define KVM_ITS_DTE_ITTADDR_SHIFT     5
+ #define KVM_ITS_DTE_ITTADDR_MASK      GENMASK_ULL(48, 5)
+ #define KVM_ITS_DTE_SIZE_MASK         GENMASK_ULL(4, 0)
+ #define KVM_ITS_L1E_VALID_MASK                BIT_ULL(63)
+ /* we only support 64 kB translation table page size */
+ #define KVM_ITS_L1E_ADDR_MASK         GENMASK_ULL(51, 16)
+ #define KVM_VGIC_V3_RDIST_INDEX_MASK  GENMASK_ULL(11, 0)
+ #define KVM_VGIC_V3_RDIST_FLAGS_MASK  GENMASK_ULL(15, 12)
+ #define KVM_VGIC_V3_RDIST_FLAGS_SHIFT 12
+ #define KVM_VGIC_V3_RDIST_BASE_MASK   GENMASK_ULL(51, 16)
+ #define KVM_VGIC_V3_RDIST_COUNT_MASK  GENMASK_ULL(63, 52)
+ #define KVM_VGIC_V3_RDIST_COUNT_SHIFT 52
+ #ifdef CONFIG_DEBUG_SPINLOCK
+ #define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p)
+ #else
+ #define DEBUG_SPINLOCK_BUG_ON(p)
+ #endif
+ /* Requires the irq_lock to be held by the caller. */
+ static inline bool irq_is_pending(struct vgic_irq *irq)
+ {
+       if (irq->config == VGIC_CONFIG_EDGE)
+               return irq->pending_latch;
+       else
+               return irq->pending_latch || irq->line_level;
+ }
+ static inline bool vgic_irq_is_mapped_level(struct vgic_irq *irq)
+ {
+       return irq->config == VGIC_CONFIG_LEVEL && irq->hw;
+ }
+ static inline int vgic_irq_get_lr_count(struct vgic_irq *irq)
+ {
+       /* Account for the active state as an interrupt */
+       if (vgic_irq_is_sgi(irq->intid) && irq->source)
+               return hweight8(irq->source) + irq->active;
+       return irq_is_pending(irq) || irq->active;
+ }
+ static inline bool vgic_irq_is_multi_sgi(struct vgic_irq *irq)
+ {
+       return vgic_irq_get_lr_count(irq) > 1;
+ }
+ /*
+  * This struct provides an intermediate representation of the fields contained
+  * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC
+  * state to userspace can generate either GICv2 or GICv3 CPU interface
+  * registers regardless of the hardware backed GIC used.
+  */
+ struct vgic_vmcr {
+       u32     grpen0;
+       u32     grpen1;
+       u32     ackctl;
+       u32     fiqen;
+       u32     cbpr;
+       u32     eoim;
+       u32     abpr;
+       u32     bpr;
+       u32     pmr;  /* Priority mask field in the GICC_PMR and
+                      * ICC_PMR_EL1 priority field format */
+ };
+ struct vgic_reg_attr {
+       struct kvm_vcpu *vcpu;
+       gpa_t addr;
+ };
+ int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
+                      struct vgic_reg_attr *reg_attr);
+ int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
+                      struct vgic_reg_attr *reg_attr);
+ const struct vgic_register_region *
+ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
+                    gpa_t addr, int len);
+ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
+                             u32 intid);
+ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq);
+ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
+ bool vgic_get_phys_line_level(struct vgic_irq *irq);
+ void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending);
+ void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active);
+ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
+                          unsigned long flags);
+ void vgic_kick_vcpus(struct kvm *kvm);
+ int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
+                     phys_addr_t addr, phys_addr_t alignment);
+ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
+ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
+ void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
+ void vgic_v2_set_underflow(struct kvm_vcpu *vcpu);
+ void vgic_v2_set_npie(struct kvm_vcpu *vcpu);
+ int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
+ int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+                        int offset, u32 *val);
+ int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+                         int offset, u32 *val);
+ void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+ void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+ void vgic_v2_enable(struct kvm_vcpu *vcpu);
+ int vgic_v2_probe(const struct gic_kvm_info *info);
+ int vgic_v2_map_resources(struct kvm *kvm);
+ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
+                            enum vgic_type);
+ void vgic_v2_init_lrs(void);
+ void vgic_v2_load(struct kvm_vcpu *vcpu);
+ void vgic_v2_put(struct kvm_vcpu *vcpu);
+ void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu);
+ void vgic_v2_save_state(struct kvm_vcpu *vcpu);
+ void vgic_v2_restore_state(struct kvm_vcpu *vcpu);
+ static inline void vgic_get_irq_kref(struct vgic_irq *irq)
+ {
+       if (irq->intid < VGIC_MIN_LPI)
+               return;
+       kref_get(&irq->refcount);
+ }
+ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
+ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
+ void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
+ void vgic_v3_set_underflow(struct kvm_vcpu *vcpu);
+ void vgic_v3_set_npie(struct kvm_vcpu *vcpu);
+ void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+ void vgic_v3_enable(struct kvm_vcpu *vcpu);
+ int vgic_v3_probe(const struct gic_kvm_info *info);
+ int vgic_v3_map_resources(struct kvm *kvm);
+ int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq);
+ int vgic_v3_save_pending_tables(struct kvm *kvm);
+ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count);
+ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu);
+ bool vgic_v3_check_base(struct kvm *kvm);
+ void vgic_v3_load(struct kvm_vcpu *vcpu);
+ void vgic_v3_put(struct kvm_vcpu *vcpu);
+ void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu);
+ bool vgic_has_its(struct kvm *kvm);
+ int kvm_vgic_register_its_device(void);
+ void vgic_enable_lpis(struct kvm_vcpu *vcpu);
+ void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu);
+ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi);
+ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
+ int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+                        int offset, u32 *val);
+ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+                        int offset, u32 *val);
+ int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+                        u64 id, u64 *val);
+ int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id,
+                               u64 *reg);
+ int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+                                   u32 intid, u64 *val);
+ int kvm_register_vgic_device(unsigned long type);
+ void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+ void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+ int vgic_lazy_init(struct kvm *kvm);
+ int vgic_init(struct kvm *kvm);
+ void vgic_debug_init(struct kvm *kvm);
+ void vgic_debug_destroy(struct kvm *kvm);
+ bool lock_all_vcpus(struct kvm *kvm);
+ void unlock_all_vcpus(struct kvm *kvm);
+ static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)
+ {
+       struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu;
+       /*
+        * num_pri_bits are initialized with HW supported values.
+        * We can rely safely on num_pri_bits even if VM has not
+        * restored ICC_CTLR_EL1 before restoring APnR registers.
+        */
+       switch (cpu_if->num_pri_bits) {
+       case 7: return 3;
+       case 6: return 1;
+       default: return 0;
+       }
+ }
+ static inline bool
+ vgic_v3_redist_region_full(struct vgic_redist_region *region)
+ {
+       if (!region->count)
+               return false;
+       return (region->free_index >= region->count);
+ }
+ struct vgic_redist_region *vgic_v3_rdist_free_slot(struct list_head *rdregs);
+ static inline size_t
+ vgic_v3_rd_region_size(struct kvm *kvm, struct vgic_redist_region *rdreg)
+ {
+       if (!rdreg->count)
+               return atomic_read(&kvm->online_vcpus) * KVM_VGIC_V3_REDIST_SIZE;
+       else
+               return rdreg->count * KVM_VGIC_V3_REDIST_SIZE;
+ }
+ struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm,
+                                                          u32 index);
+ bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size);
+ static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size)
+ {
+       struct vgic_dist *d = &kvm->arch.vgic;
+       return (base + size > d->vgic_dist_base) &&
+               (base < d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE);
+ }
+ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr);
+ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
+                        u32 devid, u32 eventid, struct vgic_irq **irq);
+ struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
+ int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi);
+ void vgic_lpi_translation_cache_init(struct kvm *kvm);
+ void vgic_lpi_translation_cache_destroy(struct kvm *kvm);
+ void vgic_its_invalidate_cache(struct kvm *kvm);
+ bool vgic_supports_direct_msis(struct kvm *kvm);
+ int vgic_v4_init(struct kvm *kvm);
+ void vgic_v4_teardown(struct kvm *kvm);
+ void vgic_v4_configure_vsgis(struct kvm *kvm);
+ #endif
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index 6bc6d7613f76d4b5042febd5d2da4ca3ec54a3ee,4e1695db788a7a3ac650368990b3e3bec9ac74ab..238b78e069fe24365339537f94067eed000f8f1e
@@@ -1860,6 -2017,10 +2017,10 @@@ int kvm_vcpu_ioctl_get_hv_cpuid(struct 
                        ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
                        ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
  
 -                      ent->ebx |= HV_X64_DEBUGGING;
++                      ent->ebx |= HV_DEBUGGING;
+                       ent->edx |= HV_X64_GUEST_DEBUGGING_AVAILABLE;
+                       ent->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE;
                        /*
                         * Direct Synthetic timers only make sense with in-kernel
                         * LAPIC
Simple merge
Simple merge
index 262fae9526b1f8549f81eeb705d12be4ddc24a2f,0000000000000000000000000000000000000000..e73a11850055c4de93b69c7e1d5559d779243232
mode 100644,000000..100644
--- /dev/null
@@@ -1,493 -1,0 +1,497 @@@
 +/* SPDX-License-Identifier: GPL-2.0 */
 +
 +/*
 + * This file contains definitions from Hyper-V Hypervisor Top-Level Functional
 + * Specification (TLFS):
 + * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs
 + */
 +
 +#ifndef _ASM_GENERIC_HYPERV_TLFS_H
 +#define _ASM_GENERIC_HYPERV_TLFS_H
 +
 +#include <linux/types.h>
 +#include <linux/bits.h>
 +#include <linux/time64.h>
 +
 +/*
 + * While not explicitly listed in the TLFS, Hyper-V always runs with a page size
 + * of 4096. These definitions are used when communicating with Hyper-V using
 + * guest physical pages and guest physical page addresses, since the guest page
 + * size may not be 4096 on all architectures.
 + */
 +#define HV_HYP_PAGE_SHIFT      12
 +#define HV_HYP_PAGE_SIZE       BIT(HV_HYP_PAGE_SHIFT)
 +#define HV_HYP_PAGE_MASK       (~(HV_HYP_PAGE_SIZE - 1))
 +
 +/*
 + * Hyper-V provides two categories of flags relevant to guest VMs.  The
 + * "Features" category indicates specific functionality that is available
 + * to guests on this particular instance of Hyper-V. The "Features"
 + * are presented in four groups, each of which is 32 bits. The group A
 + * and B definitions are common across architectures and are listed here.
 + * However, not all flags are relevant on all architectures.
 + *
 + * Groups C and D vary across architectures and are listed in the
 + * architecture specific portion of hyperv-tlfs.h. Some of these flags exist
 + * on multiple architectures, but the bit positions are different so they
 + * cannot appear in the generic portion of hyperv-tlfs.h.
 + *
 + * The "Enlightenments" category provides recommendations on whether to use
 + * specific enlightenments that are available. The Enlighenments are a single
 + * group of 32 bits, but they vary across architectures and are listed in
 + * the architecture specific portion of hyperv-tlfs.h.
 + */
 +
 +/*
 + * Group A Features.
 + */
 +
 +/* VP Runtime register available */
 +#define HV_MSR_VP_RUNTIME_AVAILABLE           BIT(0)
 +/* Partition Reference Counter available*/
 +#define HV_MSR_TIME_REF_COUNT_AVAILABLE               BIT(1)
 +/* Basic SynIC register available */
 +#define HV_MSR_SYNIC_AVAILABLE                        BIT(2)
 +/* Synthetic Timer registers available */
 +#define HV_MSR_SYNTIMER_AVAILABLE             BIT(3)
 +/* Virtual APIC assist and VP assist page registers available */
 +#define HV_MSR_APIC_ACCESS_AVAILABLE          BIT(4)
 +/* Hypercall and Guest OS ID registers available*/
 +#define HV_MSR_HYPERCALL_AVAILABLE            BIT(5)
 +/* Access virtual processor index register available*/
 +#define HV_MSR_VP_INDEX_AVAILABLE             BIT(6)
 +/* Virtual system reset register available*/
 +#define HV_MSR_RESET_AVAILABLE                        BIT(7)
 +/* Access statistics page registers available */
 +#define HV_MSR_STAT_PAGES_AVAILABLE           BIT(8)
 +/* Partition reference TSC register is available */
 +#define HV_MSR_REFERENCE_TSC_AVAILABLE                BIT(9)
 +/* Partition Guest IDLE register is available */
 +#define HV_MSR_GUEST_IDLE_AVAILABLE           BIT(10)
 +/* Partition local APIC and TSC frequency registers available */
 +#define HV_ACCESS_FREQUENCY_MSRS              BIT(11)
 +/* AccessReenlightenmentControls privilege */
 +#define HV_ACCESS_REENLIGHTENMENT             BIT(13)
 +/* AccessTscInvariantControls privilege */
 +#define HV_ACCESS_TSC_INVARIANT                       BIT(15)
 +
 +/*
 + * Group B features.
 + */
 +#define HV_CREATE_PARTITIONS                  BIT(0)
 +#define HV_ACCESS_PARTITION_ID                        BIT(1)
 +#define HV_ACCESS_MEMORY_POOL                 BIT(2)
 +#define HV_ADJUST_MESSAGE_BUFFERS             BIT(3)
 +#define HV_POST_MESSAGES                      BIT(4)
 +#define HV_SIGNAL_EVENTS                      BIT(5)
 +#define HV_CREATE_PORT                                BIT(6)
 +#define HV_CONNECT_PORT                               BIT(7)
 +#define HV_ACCESS_STATS                               BIT(8)
 +#define HV_DEBUGGING                          BIT(11)
 +#define HV_CPU_POWER_MANAGEMENT                       BIT(12)
 +
 +
 +/*
 + * TSC page layout.
 + */
 +struct ms_hyperv_tsc_page {
 +      volatile u32 tsc_sequence;
 +      u32 reserved1;
 +      volatile u64 tsc_scale;
 +      volatile s64 tsc_offset;
 +} __packed;
 +
 +/*
 + * The guest OS needs to register the guest ID with the hypervisor.
 + * The guest ID is a 64 bit entity and the structure of this ID is
 + * specified in the Hyper-V specification:
 + *
 + * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx
 + *
 + * While the current guideline does not specify how Linux guest ID(s)
 + * need to be generated, our plan is to publish the guidelines for
 + * Linux and other guest operating systems that currently are hosted
 + * on Hyper-V. The implementation here conforms to this yet
 + * unpublished guidelines.
 + *
 + *
 + * Bit(s)
 + * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
 + * 62:56 - Os Type; Linux is 0x100
 + * 55:48 - Distro specific identification
 + * 47:16 - Linux kernel version number
 + * 15:0  - Distro specific identification
 + *
 + *
 + */
 +
 +#define HV_LINUX_VENDOR_ID              0x8100
 +
 +/*
 + * Crash notification flags.
 + */
 +#define HV_CRASH_CTL_CRASH_NOTIFY_MSG         BIT_ULL(62)
 +#define HV_CRASH_CTL_CRASH_NOTIFY             BIT_ULL(63)
 +
 +/* Declare the various hypercall operations. */
 +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE    0x0002
 +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST     0x0003
 +#define HVCALL_NOTIFY_LONG_SPIN_WAIT          0x0008
 +#define HVCALL_SEND_IPI                               0x000b
 +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
 +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX  0x0014
 +#define HVCALL_SEND_IPI_EX                    0x0015
 +#define HVCALL_GET_VP_REGISTERS                       0x0050
 +#define HVCALL_SET_VP_REGISTERS                       0x0051
 +#define HVCALL_POST_MESSAGE                   0x005c
 +#define HVCALL_SIGNAL_EVENT                   0x005d
++#define HVCALL_POST_DEBUG_DATA                        0x0069
++#define HVCALL_RETRIEVE_DEBUG_DATA            0x006a
++#define HVCALL_RESET_DEBUG_SESSION            0x006b
 +#define HVCALL_RETARGET_INTERRUPT             0x007e
 +#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
 +#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
 +
 +#define HV_FLUSH_ALL_PROCESSORS                       BIT(0)
 +#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES   BIT(1)
 +#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY     BIT(2)
 +#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT    BIT(3)
 +
 +enum HV_GENERIC_SET_FORMAT {
 +      HV_GENERIC_SET_SPARSE_4K,
 +      HV_GENERIC_SET_ALL,
 +};
 +
 +#define HV_PARTITION_ID_SELF          ((u64)-1)
 +#define HV_VP_INDEX_SELF              ((u32)-2)
 +
 +#define HV_HYPERCALL_RESULT_MASK      GENMASK_ULL(15, 0)
 +#define HV_HYPERCALL_FAST_BIT         BIT(16)
 +#define HV_HYPERCALL_VARHEAD_OFFSET   17
 +#define HV_HYPERCALL_REP_COMP_OFFSET  32
 +#define HV_HYPERCALL_REP_COMP_1               BIT_ULL(32)
 +#define HV_HYPERCALL_REP_COMP_MASK    GENMASK_ULL(43, 32)
 +#define HV_HYPERCALL_REP_START_OFFSET 48
 +#define HV_HYPERCALL_REP_START_MASK   GENMASK_ULL(59, 48)
 +
 +/* hypercall status code */
 +#define HV_STATUS_SUCCESS                     0
 +#define HV_STATUS_INVALID_HYPERCALL_CODE      2
 +#define HV_STATUS_INVALID_HYPERCALL_INPUT     3
 +#define HV_STATUS_INVALID_ALIGNMENT           4
 +#define HV_STATUS_INVALID_PARAMETER           5
++#define HV_STATUS_OPERATION_DENIED            8
 +#define HV_STATUS_INSUFFICIENT_MEMORY         11
 +#define HV_STATUS_INVALID_PORT_ID             17
 +#define HV_STATUS_INVALID_CONNECTION_ID               18
 +#define HV_STATUS_INSUFFICIENT_BUFFERS                19
 +
 +/*
 + * The Hyper-V TimeRefCount register and the TSC
 + * page provide a guest VM clock with 100ns tick rate
 + */
 +#define HV_CLOCK_HZ (NSEC_PER_SEC/100)
 +
 +/* Define the number of synthetic interrupt sources. */
 +#define HV_SYNIC_SINT_COUNT           (16)
 +/* Define the expected SynIC version. */
 +#define HV_SYNIC_VERSION_1            (0x1)
 +/* Valid SynIC vectors are 16-255. */
 +#define HV_SYNIC_FIRST_VALID_VECTOR   (16)
 +
 +#define HV_SYNIC_CONTROL_ENABLE               (1ULL << 0)
 +#define HV_SYNIC_SIMP_ENABLE          (1ULL << 0)
 +#define HV_SYNIC_SIEFP_ENABLE         (1ULL << 0)
 +#define HV_SYNIC_SINT_MASKED          (1ULL << 16)
 +#define HV_SYNIC_SINT_AUTO_EOI                (1ULL << 17)
 +#define HV_SYNIC_SINT_VECTOR_MASK     (0xFF)
 +
 +#define HV_SYNIC_STIMER_COUNT         (4)
 +
 +/* Define synthetic interrupt controller message constants. */
 +#define HV_MESSAGE_SIZE                       (256)
 +#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240)
 +#define HV_MESSAGE_PAYLOAD_QWORD_COUNT        (30)
 +
 +/* Define synthetic interrupt controller message flags. */
 +union hv_message_flags {
 +      __u8 asu8;
 +      struct {
 +              __u8 msg_pending:1;
 +              __u8 reserved:7;
 +      } __packed;
 +};
 +
 +/* Define port identifier type. */
 +union hv_port_id {
 +      __u32 asu32;
 +      struct {
 +              __u32 id:24;
 +              __u32 reserved:8;
 +      } __packed u;
 +};
 +
 +/* Define synthetic interrupt controller message header. */
 +struct hv_message_header {
 +      __u32 message_type;
 +      __u8 payload_size;
 +      union hv_message_flags message_flags;
 +      __u8 reserved[2];
 +      union {
 +              __u64 sender;
 +              union hv_port_id port;
 +      };
 +} __packed;
 +
 +/* Define synthetic interrupt controller message format. */
 +struct hv_message {
 +      struct hv_message_header header;
 +      union {
 +              __u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
 +      } u;
 +} __packed;
 +
 +/* Define the synthetic interrupt message page layout. */
 +struct hv_message_page {
 +      struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
 +} __packed;
 +
 +/* Define timer message payload structure. */
 +struct hv_timer_message_payload {
 +      __u32 timer_index;
 +      __u32 reserved;
 +      __u64 expiration_time;  /* When the timer expired */
 +      __u64 delivery_time;    /* When the message was delivered */
 +} __packed;
 +
 +
 +/* Define synthetic interrupt controller flag constants. */
 +#define HV_EVENT_FLAGS_COUNT          (256 * 8)
 +#define HV_EVENT_FLAGS_LONG_COUNT     (256 / sizeof(unsigned long))
 +
 +/*
 + * Synthetic timer configuration.
 + */
 +union hv_stimer_config {
 +      u64 as_uint64;
 +      struct {
 +              u64 enable:1;
 +              u64 periodic:1;
 +              u64 lazy:1;
 +              u64 auto_enable:1;
 +              u64 apic_vector:8;
 +              u64 direct_mode:1;
 +              u64 reserved_z0:3;
 +              u64 sintx:4;
 +              u64 reserved_z1:44;
 +      } __packed;
 +};
 +
 +
 +/* Define the synthetic interrupt controller event flags format. */
 +union hv_synic_event_flags {
 +      unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT];
 +};
 +
 +/* Define SynIC control register. */
 +union hv_synic_scontrol {
 +      u64 as_uint64;
 +      struct {
 +              u64 enable:1;
 +              u64 reserved:63;
 +      } __packed;
 +};
 +
 +/* Define synthetic interrupt source. */
 +union hv_synic_sint {
 +      u64 as_uint64;
 +      struct {
 +              u64 vector:8;
 +              u64 reserved1:8;
 +              u64 masked:1;
 +              u64 auto_eoi:1;
 +              u64 polling:1;
 +              u64 reserved2:45;
 +      } __packed;
 +};
 +
 +/* Define the format of the SIMP register */
 +union hv_synic_simp {
 +      u64 as_uint64;
 +      struct {
 +              u64 simp_enabled:1;
 +              u64 preserved:11;
 +              u64 base_simp_gpa:52;
 +      } __packed;
 +};
 +
 +/* Define the format of the SIEFP register */
 +union hv_synic_siefp {
 +      u64 as_uint64;
 +      struct {
 +              u64 siefp_enabled:1;
 +              u64 preserved:11;
 +              u64 base_siefp_gpa:52;
 +      } __packed;
 +};
 +
 +struct hv_vpset {
 +      u64 format;
 +      u64 valid_bank_mask;
 +      u64 bank_contents[];
 +} __packed;
 +
 +/* HvCallSendSyntheticClusterIpi hypercall */
 +struct hv_send_ipi {
 +      u32 vector;
 +      u32 reserved;
 +      u64 cpu_mask;
 +} __packed;
 +
 +/* HvCallSendSyntheticClusterIpiEx hypercall */
 +struct hv_send_ipi_ex {
 +      u32 vector;
 +      u32 reserved;
 +      struct hv_vpset vp_set;
 +} __packed;
 +
 +/* HvFlushGuestPhysicalAddressSpace hypercalls */
 +struct hv_guest_mapping_flush {
 +      u64 address_space;
 +      u64 flags;
 +} __packed;
 +
 +/*
 + *  HV_MAX_FLUSH_PAGES = "additional_pages" + 1. It's limited
 + *  by the bitwidth of "additional_pages" in union hv_gpa_page_range.
 + */
 +#define HV_MAX_FLUSH_PAGES (2048)
 +
 +/* HvFlushGuestPhysicalAddressList hypercall */
 +union hv_gpa_page_range {
 +      u64 address_space;
 +      struct {
 +              u64 additional_pages:11;
 +              u64 largepage:1;
 +              u64 basepfn:52;
 +      } page;
 +};
 +
 +/*
 + * All input flush parameters should be in single page. The max flush
 + * count is equal with how many entries of union hv_gpa_page_range can
 + * be populated into the input parameter page.
 + */
 +#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) /        \
 +                              sizeof(union hv_gpa_page_range))
 +
 +struct hv_guest_mapping_flush_list {
 +      u64 address_space;
 +      u64 flags;
 +      union hv_gpa_page_range gpa_list[HV_MAX_FLUSH_REP_COUNT];
 +};
 +
 +/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
 +struct hv_tlb_flush {
 +      u64 address_space;
 +      u64 flags;
 +      u64 processor_mask;
 +      u64 gva_list[];
 +} __packed;
 +
 +/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
 +struct hv_tlb_flush_ex {
 +      u64 address_space;
 +      u64 flags;
 +      struct hv_vpset hv_vp_set;
 +      u64 gva_list[];
 +} __packed;
 +
 +/* HvRetargetDeviceInterrupt hypercall */
 +union hv_msi_entry {
 +      u64 as_uint64;
 +      struct {
 +              u32 address;
 +              u32 data;
 +      } __packed;
 +};
 +
 +struct hv_interrupt_entry {
 +      u32 source;                     /* 1 for MSI(-X) */
 +      u32 reserved1;
 +      union hv_msi_entry msi_entry;
 +} __packed;
 +
 +/*
 + * flags for hv_device_interrupt_target.flags
 + */
 +#define HV_DEVICE_INTERRUPT_TARGET_MULTICAST          1
 +#define HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET      2
 +
 +struct hv_device_interrupt_target {
 +      u32 vector;
 +      u32 flags;
 +      union {
 +              u64 vp_mask;
 +              struct hv_vpset vp_set;
 +      };
 +} __packed;
 +
 +struct hv_retarget_device_interrupt {
 +      u64 partition_id;               /* use "self" */
 +      u64 device_id;
 +      struct hv_interrupt_entry int_entry;
 +      u64 reserved2;
 +      struct hv_device_interrupt_target int_target;
 +} __packed __aligned(8);
 +
 +
 +/* HvGetVpRegisters hypercall input with variable size reg name list*/
 +struct hv_get_vp_registers_input {
 +      struct {
 +              u64 partitionid;
 +              u32 vpindex;
 +              u8  inputvtl;
 +              u8  padding[3];
 +      } header;
 +      struct input {
 +              u32 name0;
 +              u32 name1;
 +      } element[];
 +} __packed;
 +
 +
 +/* HvGetVpRegisters returns an array of these output elements */
 +struct hv_get_vp_registers_output {
 +      union {
 +              struct {
 +                      u32 a;
 +                      u32 b;
 +                      u32 c;
 +                      u32 d;
 +              } as32 __packed;
 +              struct {
 +                      u64 low;
 +                      u64 high;
 +              } as64 __packed;
 +      };
 +};
 +
 +/* HvSetVpRegisters hypercall with variable size reg name/value list*/
 +struct hv_set_vp_registers_input {
 +      struct {
 +              u64 partitionid;
 +              u32 vpindex;
 +              u8  inputvtl;
 +              u8  padding[3];
 +      } header;
 +      struct {
 +              u32 name;
 +              u32 padding1;
 +              u64 padding2;
 +              u64 valuelow;
 +              u64 valuehigh;
 +      } element[];
 +} __packed;
 +
 +#endif
Simple merge
Simple merge
diff --cc kernel/exit.c
Simple merge
Simple merge