Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6: (27 commits)
SLUB: Fix memory hotplug with !NUMA
slub: Move functions to reduce #ifdefs
slub: Enable sysfs support for !CONFIG_SLUB_DEBUG
SLUB: Optimize slab_free() debug check
slub: Move NUMA-related functions under CONFIG_NUMA
slub: Add lock release annotation
slub: Fix signedness warnings
slub: extract common code to remove objects from partial list without locking
SLUB: Pass active and inactive redzone flags instead of boolean to debug functions
slub: reduce differences between SMP and NUMA
Revert "Slub: UP bandaid"
percpu: clear memory allocated with the km allocator
percpu: use percpu allocator on UP too
percpu: reduce PCPU_MIN_UNIT_SIZE to 32k
vmalloc: pcpu_get/free_vm_areas() aren't needed on UP
SLUB: Fix merged slab cache names
Slub: UP bandaid
slub: fix SLUB_RESILIENCY_TEST for dynamic kmalloc caches
slub: Fix up missing kmalloc_cache -> kmem_cache_node case for memoryhotplug
slub: Add dummy functions for the !SLUB_DEBUG case
...
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-roccat-pyra b/Documentation/ABI/testing/sysfs-driver-hid-roccat-pyra
new file mode 100644
index 0000000..ad1125b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-roccat-pyra
@@ -0,0 +1,98 @@
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/actual_cpi
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: It is possible to switch the cpi setting of the mouse with the
+ press of a button.
+ When read, this file returns the raw number of the actual cpi
+ setting reported by the mouse. This number has to be further
+ processed to receive the real dpi value.
+
+ VALUE DPI
+ 1 400
+ 2 800
+ 4 1600
+
+ This file is readonly.
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/actual_profile
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read, this file returns the number of the actual profile in
+ range 0-4.
+ This file is readonly.
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/firmware_version
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read, this file returns the raw integer version number of the
+ firmware reported by the mouse. Using the integer value eases
+ further usage in other programs. To receive the real version
+ number the decimal point has to be shifted 2 positions to the
+ left. E.g. a returned value of 138 means 1.38
+ This file is readonly.
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/profile_settings
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split in settings and buttons.
+ profile_settings holds informations like resolution, sensitivity
+ and light effects.
+ When written, this file lets one write the respective profile
+ settings back to the mouse. The data has to be 13 bytes long.
+ The mouse will reject invalid data.
+ Which profile to write is determined by the profile number
+ contained in the data.
+ This file is writeonly.
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/profile[1-5]_settings
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split in settings and buttons.
+ profile_settings holds informations like resolution, sensitivity
+ and light effects.
+ When read, these files return the respective profile settings.
+ The returned data is 13 bytes in size.
+ This file is readonly.
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/profile_buttons
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split in settings and buttons.
+ profile_buttons holds informations about button layout.
+ When written, this file lets one write the respective profile
+ buttons back to the mouse. The data has to be 19 bytes long.
+ The mouse will reject invalid data.
+ Which profile to write is determined by the profile number
+ contained in the data.
+ This file is writeonly.
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/profile[1-5]_buttons
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split in settings and buttons.
+ profile_buttons holds informations about button layout.
+ When read, these files return the respective profile buttons.
+ The returned data is 19 bytes in size.
+ This file is readonly.
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/startup_profile
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The integer value of this attribute ranges from 0-4.
+ When read, this attribute returns the number of the profile
+ that's active when the mouse is powered on.
+ This file is readonly.
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/settings
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read, this file returns the settings stored in the mouse.
+ The size of the data is 3 bytes and holds information on the
+ startup_profile.
+ When written, this file lets write settings back to the mouse.
+ The data has to be 3 bytes long. The mouse will reject invalid
+ data.
diff --git a/Documentation/input/ntrig.txt b/Documentation/input/ntrig.txt
new file mode 100644
index 0000000..be1fd98
--- /dev/null
+++ b/Documentation/input/ntrig.txt
@@ -0,0 +1,126 @@
+N-Trig touchscreen Driver
+-------------------------
+ Copyright (c) 2008-2010 Rafi Rubin <rafi@seas.upenn.edu>
+ Copyright (c) 2009-2010 Stephane Chatty
+
+This driver provides support for N-Trig pen and multi-touch sensors. Single
+and multi-touch events are translated to the appropriate protocols for
+the hid and input systems. Pen events are sufficiently hid compliant and
+are left to the hid core. The driver also provides additional filtering
+and utility functions accessible with sysfs and module parameters.
+
+This driver has been reported to work properly with multiple N-Trig devices
+attached.
+
+
+Parameters
+----------
+
+Note: values set at load time are global and will apply to all applicable
+devices. Adjusting parameters with sysfs will override the load time values,
+but only for that one device.
+
+The following parameters are used to configure filters to reduce noise:
+
+activate_slack number of fingers to ignore before processing events
+
+activation_height size threshold to activate immediately
+activation_width
+
+min_height size threshold bellow which fingers are ignored
+min_width both to decide activation and during activity
+
+deactivate_slack the number of "no contact" frames to ignore before
+ propagating the end of activity events
+
+When the last finger is removed from the device, it sends a number of empty
+frames. By holding off on deactivation for a few frames we can tolerate false
+erroneous disconnects, where the sensor may mistakenly not detect a finger that
+is still present. Thus deactivate_slack addresses problems where a users might
+see breaks in lines during drawing, or drop an object during a long drag.
+
+
+Additional sysfs items
+----------------------
+
+These nodes just provide easy access to the ranges reported by the device.
+sensor_logical_height the range for positions reported during activity
+sensor_logical_width
+
+sensor_physical_height internal ranges not used for normal events but
+sensor_physical_width useful for tuning
+
+All N-Trig devices with product id of 1 report events in the ranges of
+X: 0-9600
+Y: 0-7200
+However not all of these devices have the same physical dimensions. Most
+seem to be 12" sensors (Dell Latitude XT and XT2 and the HP TX2), and
+at least one model (Dell Studio 17) has a 17" sensor. The ratio of physical
+to logical sizes is used to adjust the size based filter parameters.
+
+
+Filtering
+---------
+
+With the release of the early multi-touch firmwares it became increasingly
+obvious that these sensors were prone to erroneous events. Users reported
+seeing both inappropriately dropped contact and ghosts, contacts reported
+where no finger was actually touching the screen.
+
+Deactivation slack helps prevent dropped contact for single touch use, but does
+not address the problem of dropping one of more contacts while other contacts
+are still active. Drops in the multi-touch context require additional
+processing and should be handled in tandem with tacking.
+
+As observed ghost contacts are similar to actual use of the sensor, but they
+seem to have different profiles. Ghost activity typically shows up as small
+short lived touches. As such, I assume that the longer the continuous stream
+of events the more likely those events are from a real contact, and that the
+larger the size of each contact the more likely it is real. Balancing the
+goals of preventing ghosts and accepting real events quickly (to minimize
+user observable latency), the filter accumulates confidence for incoming
+events until it hits thresholds and begins propagating. In the interest in
+minimizing stored state as well as the cost of operations to make a decision,
+I've kept that decision simple.
+
+Time is measured in terms of the number of fingers reported, not frames since
+the probability of multiple simultaneous ghosts is expected to drop off
+dramatically with increasing numbers. Rather than accumulate weight as a
+function of size, I just use it as a binary threshold. A sufficiently large
+contact immediately overrides the waiting period and leads to activation.
+
+Setting the activation size thresholds to large values will result in deciding
+primarily on activation slack. If you see longer lived ghosts, turning up the
+activation slack while reducing the size thresholds may suffice to eliminate
+the ghosts while keeping the screen quite responsive to firm taps.
+
+Contacts continue to be filtered with min_height and min_width even after
+the initial activation filter is satisfied. The intent is to provide
+a mechanism for filtering out ghosts in the form of an extra finger while
+you actually are using the screen. In practice this sort of ghost has
+been far less problematic or relatively rare and I've left the defaults
+set to 0 for both parameters, effectively turning off that filter.
+
+I don't know what the optimal values are for these filters. If the defaults
+don't work for you, please play with the parameters. If you do find other
+values more comfortable, I would appreciate feedback.
+
+The calibration of these devices does drift over time. If ghosts or contact
+dropping worsen and interfere with the normal usage of your device, try
+recalibrating it.
+
+
+Calibration
+-----------
+
+The N-Trig windows tools provide calibration and testing routines. Also an
+unofficial unsupported set of user space tools including a calibrator is
+available at:
+http://code.launchpad.net/~rafi-seas/+junk/ntrig_calib
+
+
+Tracking
+--------
+
+As of yet, all tested N-Trig firmwares do not track fingers. When multiple
+contacts are active they seem to be sorted primarily by Y position.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4cd8b86..9533af7 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1131,9 +1131,13 @@
kvm.oos_shadow= [KVM] Disable out-of-sync shadow paging.
Default is 1 (enabled)
- kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
+ kvm.mmu_audit= [KVM] This is a R/W parameter which allows audit
+ KVM MMU at runtime.
Default is 0 (off)
+ kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
+ Default is 1 (enabled)
+
kvm-amd.npt= [KVM,AMD] Disable nested paging (virtualized MMU)
for all guests.
Default is 1 (enabled) if in 64bit or 32bit-PAE mode
@@ -1698,6 +1702,8 @@
nojitter [IA64] Disables jitter checking for ITC timers.
+ no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
+
nolapic [X86-32,APIC] Do not enable or use the local APIC.
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 5f5b649..b336266 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -320,13 +320,13 @@
4.15 KVM_INTERRUPT
Capability: basic
-Architectures: x86
+Architectures: x86, ppc
Type: vcpu ioctl
Parameters: struct kvm_interrupt (in)
Returns: 0 on success, -1 on error
Queues a hardware interrupt vector to be injected. This is only
-useful if in-kernel local APIC is not used.
+useful if in-kernel local APIC or equivalent is not used.
/* for KVM_INTERRUPT */
struct kvm_interrupt {
@@ -334,8 +334,37 @@
__u32 irq;
};
+X86:
+
Note 'irq' is an interrupt vector, not an interrupt pin or line.
+PPC:
+
+Queues an external interrupt to be injected. This ioctl is overleaded
+with 3 different irq values:
+
+a) KVM_INTERRUPT_SET
+
+ This injects an edge type external interrupt into the guest once it's ready
+ to receive interrupts. When injected, the interrupt is done.
+
+b) KVM_INTERRUPT_UNSET
+
+ This unsets any pending interrupt.
+
+ Only available with KVM_CAP_PPC_UNSET_IRQ.
+
+c) KVM_INTERRUPT_SET_LEVEL
+
+ This injects a level type external interrupt into the guest context. The
+ interrupt stays pending until a specific ioctl with KVM_INTERRUPT_UNSET
+ is triggered.
+
+ Only available with KVM_CAP_PPC_IRQ_LEVEL.
+
+Note that any value for 'irq' other than the ones stated above is invalid
+and incurs unexpected behavior.
+
4.16 KVM_DEBUG_GUEST
Capability: basic
@@ -1013,8 +1042,9 @@
entries in the 'entries' array, which is then filled.
The entries returned are the host cpuid as returned by the cpuid instruction,
-with unknown or unsupported features masked out. The fields in each entry
-are defined as follows:
+with unknown or unsupported features masked out. Some features (for example,
+x2apic), may not be present in the host cpu, but are exposed by kvm if it can
+emulate them efficiently. The fields in each entry are defined as follows:
function: the eax value used to obtain the entry
index: the ecx value used to obtain the entry (for entries that are
@@ -1032,6 +1062,29 @@
eax, ebx, ecx, edx: the values returned by the cpuid instruction for
this function/index combination
+4.46 KVM_PPC_GET_PVINFO
+
+Capability: KVM_CAP_PPC_GET_PVINFO
+Architectures: ppc
+Type: vm ioctl
+Parameters: struct kvm_ppc_pvinfo (out)
+Returns: 0 on success, !0 on error
+
+struct kvm_ppc_pvinfo {
+ __u32 flags;
+ __u32 hcall[4];
+ __u8 pad[108];
+};
+
+This ioctl fetches PV specific information that need to be passed to the guest
+using the device tree or other means from vm context.
+
+For now the only implemented piece of information distributed here is an array
+of 4 instructions that make up a hypercall.
+
+If any additional field gets added to this structure later on, a bit for that
+additional piece of information will be set in the flags bitmap.
+
5. The kvm_run structure
Application code obtains a pointer to the kvm_run structure by
diff --git a/Documentation/kvm/ppc-pv.txt b/Documentation/kvm/ppc-pv.txt
new file mode 100644
index 0000000..a7f2244
--- /dev/null
+++ b/Documentation/kvm/ppc-pv.txt
@@ -0,0 +1,196 @@
+The PPC KVM paravirtual interface
+=================================
+
+The basic execution principle by which KVM on PowerPC works is to run all kernel
+space code in PR=1 which is user space. This way we trap all privileged
+instructions and can emulate them accordingly.
+
+Unfortunately that is also the downfall. There are quite some privileged
+instructions that needlessly return us to the hypervisor even though they
+could be handled differently.
+
+This is what the PPC PV interface helps with. It takes privileged instructions
+and transforms them into unprivileged ones with some help from the hypervisor.
+This cuts down virtualization costs by about 50% on some of my benchmarks.
+
+The code for that interface can be found in arch/powerpc/kernel/kvm*
+
+Querying for existence
+======================
+
+To find out if we're running on KVM or not, we leverage the device tree. When
+Linux is running on KVM, a node /hypervisor exists. That node contains a
+compatible property with the value "linux,kvm".
+
+Once you determined you're running under a PV capable KVM, you can now use
+hypercalls as described below.
+
+KVM hypercalls
+==============
+
+Inside the device tree's /hypervisor node there's a property called
+'hypercall-instructions'. This property contains at most 4 opcodes that make
+up the hypercall. To call a hypercall, just call these instructions.
+
+The parameters are as follows:
+
+ Register IN OUT
+
+ r0 - volatile
+ r3 1st parameter Return code
+ r4 2nd parameter 1st output value
+ r5 3rd parameter 2nd output value
+ r6 4th parameter 3rd output value
+ r7 5th parameter 4th output value
+ r8 6th parameter 5th output value
+ r9 7th parameter 6th output value
+ r10 8th parameter 7th output value
+ r11 hypercall number 8th output value
+ r12 - volatile
+
+Hypercall definitions are shared in generic code, so the same hypercall numbers
+apply for x86 and powerpc alike with the exception that each KVM hypercall
+also needs to be ORed with the KVM vendor code which is (42 << 16).
+
+Return codes can be as follows:
+
+ Code Meaning
+
+ 0 Success
+ 12 Hypercall not implemented
+ <0 Error
+
+The magic page
+==============
+
+To enable communication between the hypervisor and guest there is a new shared
+page that contains parts of supervisor visible register state. The guest can
+map this shared page using the KVM hypercall KVM_HC_PPC_MAP_MAGIC_PAGE.
+
+With this hypercall issued the guest always gets the magic page mapped at the
+desired location in effective and physical address space. For now, we always
+map the page to -4096. This way we can access it using absolute load and store
+functions. The following instruction reads the first field of the magic page:
+
+ ld rX, -4096(0)
+
+The interface is designed to be extensible should there be need later to add
+additional registers to the magic page. If you add fields to the magic page,
+also define a new hypercall feature to indicate that the host can give you more
+registers. Only if the host supports the additional features, make use of them.
+
+The magic page has the following layout as described in
+arch/powerpc/include/asm/kvm_para.h:
+
+struct kvm_vcpu_arch_shared {
+ __u64 scratch1;
+ __u64 scratch2;
+ __u64 scratch3;
+ __u64 critical; /* Guest may not get interrupts if == r1 */
+ __u64 sprg0;
+ __u64 sprg1;
+ __u64 sprg2;
+ __u64 sprg3;
+ __u64 srr0;
+ __u64 srr1;
+ __u64 dar;
+ __u64 msr;
+ __u32 dsisr;
+ __u32 int_pending; /* Tells the guest if we have an interrupt */
+};
+
+Additions to the page must only occur at the end. Struct fields are always 32
+or 64 bit aligned, depending on them being 32 or 64 bit wide respectively.
+
+Magic page features
+===================
+
+When mapping the magic page using the KVM hypercall KVM_HC_PPC_MAP_MAGIC_PAGE,
+a second return value is passed to the guest. This second return value contains
+a bitmap of available features inside the magic page.
+
+The following enhancements to the magic page are currently available:
+
+ KVM_MAGIC_FEAT_SR Maps SR registers r/w in the magic page
+
+For enhanced features in the magic page, please check for the existence of the
+feature before using them!
+
+MSR bits
+========
+
+The MSR contains bits that require hypervisor intervention and bits that do
+not require direct hypervisor intervention because they only get interpreted
+when entering the guest or don't have any impact on the hypervisor's behavior.
+
+The following bits are safe to be set inside the guest:
+
+ MSR_EE
+ MSR_RI
+ MSR_CR
+ MSR_ME
+
+If any other bit changes in the MSR, please still use mtmsr(d).
+
+Patched instructions
+====================
+
+The "ld" and "std" instructions are transormed to "lwz" and "stw" instructions
+respectively on 32 bit systems with an added offset of 4 to accomodate for big
+endianness.
+
+The following is a list of mapping the Linux kernel performs when running as
+guest. Implementing any of those mappings is optional, as the instruction traps
+also act on the shared page. So calling privileged instructions still works as
+before.
+
+From To
+==== ==
+
+mfmsr rX ld rX, magic_page->msr
+mfsprg rX, 0 ld rX, magic_page->sprg0
+mfsprg rX, 1 ld rX, magic_page->sprg1
+mfsprg rX, 2 ld rX, magic_page->sprg2
+mfsprg rX, 3 ld rX, magic_page->sprg3
+mfsrr0 rX ld rX, magic_page->srr0
+mfsrr1 rX ld rX, magic_page->srr1
+mfdar rX ld rX, magic_page->dar
+mfdsisr rX lwz rX, magic_page->dsisr
+
+mtmsr rX std rX, magic_page->msr
+mtsprg 0, rX std rX, magic_page->sprg0
+mtsprg 1, rX std rX, magic_page->sprg1
+mtsprg 2, rX std rX, magic_page->sprg2
+mtsprg 3, rX std rX, magic_page->sprg3
+mtsrr0 rX std rX, magic_page->srr0
+mtsrr1 rX std rX, magic_page->srr1
+mtdar rX std rX, magic_page->dar
+mtdsisr rX stw rX, magic_page->dsisr
+
+tlbsync nop
+
+mtmsrd rX, 0 b <special mtmsr section>
+mtmsr rX b <special mtmsr section>
+
+mtmsrd rX, 1 b <special mtmsrd section>
+
+[Book3S only]
+mtsrin rX, rY b <special mtsrin section>
+
+[BookE only]
+wrteei [0|1] b <special wrteei section>
+
+
+Some instructions require more logic to determine what's going on than a load
+or store instruction can deliver. To enable patching of those, we keep some
+RAM around where we can live translate instructions to. What happens is the
+following:
+
+ 1) copy emulation code to memory
+ 2) patch that code to fit the emulated instruction
+ 3) patch that code to return to the original pc + 4
+ 4) patch the original instruction to branch to the new code
+
+That way we can inject an arbitrary amount of code as replacement for a single
+instruction. This allows us to check for pending interrupts when setting EE=1
+for example.
diff --git a/Documentation/kvm/timekeeping.txt b/Documentation/kvm/timekeeping.txt
new file mode 100644
index 0000000..0c5033a
--- /dev/null
+++ b/Documentation/kvm/timekeeping.txt
@@ -0,0 +1,612 @@
+
+ Timekeeping Virtualization for X86-Based Architectures
+
+ Zachary Amsden <zamsden@redhat.com>
+ Copyright (c) 2010, Red Hat. All rights reserved.
+
+1) Overview
+2) Timing Devices
+3) TSC Hardware
+4) Virtualization Problems
+
+=========================================================================
+
+1) Overview
+
+One of the most complicated parts of the X86 platform, and specifically,
+the virtualization of this platform is the plethora of timing devices available
+and the complexity of emulating those devices. In addition, virtualization of
+time introduces a new set of challenges because it introduces a multiplexed
+division of time beyond the control of the guest CPU.
+
+First, we will describe the various timekeeping hardware available, then
+present some of the problems which arise and solutions available, giving
+specific recommendations for certain classes of KVM guests.
+
+The purpose of this document is to collect data and information relevant to
+timekeeping which may be difficult to find elsewhere, specifically,
+information relevant to KVM and hardware-based virtualization.
+
+=========================================================================
+
+2) Timing Devices
+
+First we discuss the basic hardware devices available. TSC and the related
+KVM clock are special enough to warrant a full exposition and are described in
+the following section.
+
+2.1) i8254 - PIT
+
+One of the first timer devices available is the programmable interrupt timer,
+or PIT. The PIT has a fixed frequency 1.193182 MHz base clock and three
+channels which can be programmed to deliver periodic or one-shot interrupts.
+These three channels can be configured in different modes and have individual
+counters. Channel 1 and 2 were not available for general use in the original
+IBM PC, and historically were connected to control RAM refresh and the PC
+speaker. Now the PIT is typically integrated as part of an emulated chipset
+and a separate physical PIT is not used.
+
+The PIT uses I/O ports 0x40 - 0x43. Access to the 16-bit counters is done
+using single or multiple byte access to the I/O ports. There are 6 modes
+available, but not all modes are available to all timers, as only timer 2
+has a connected gate input, required for modes 1 and 5. The gate line is
+controlled by port 61h, bit 0, as illustrated in the following diagram.
+
+ -------------- ----------------
+| | | |
+| 1.1932 MHz |---------->| CLOCK OUT | ---------> IRQ 0
+| Clock | | | |
+ -------------- | +->| GATE TIMER 0 |
+ | ----------------
+ |
+ | ----------------
+ | | |
+ |------>| CLOCK OUT | ---------> 66.3 KHZ DRAM
+ | | | (aka /dev/null)
+ | +->| GATE TIMER 1 |
+ | ----------------
+ |
+ | ----------------
+ | | |
+ |------>| CLOCK OUT | ---------> Port 61h, bit 5
+ | | |
+Port 61h, bit 0 ---------->| GATE TIMER 2 | \_.---- ____
+ ---------------- _| )--|LPF|---Speaker
+ / *---- \___/
+Port 61h, bit 1 -----------------------------------/
+
+The timer modes are now described.
+
+Mode 0: Single Timeout. This is a one-shot software timeout that counts down
+ when the gate is high (always true for timers 0 and 1). When the count
+ reaches zero, the output goes high.
+
+Mode 1: Triggered One-shot. The output is intially set high. When the gate
+ line is set high, a countdown is initiated (which does not stop if the gate is
+ lowered), during which the output is set low. When the count reaches zero,
+ the output goes high.
+
+Mode 2: Rate Generator. The output is initially set high. When the countdown
+ reaches 1, the output goes low for one count and then returns high. The value
+ is reloaded and the countdown automatically resumes. If the gate line goes
+ low, the count is halted. If the output is low when the gate is lowered, the
+ output automatically goes high (this only affects timer 2).
+
+Mode 3: Square Wave. This generates a high / low square wave. The count
+ determines the length of the pulse, which alternates between high and low
+ when zero is reached. The count only proceeds when gate is high and is
+ automatically reloaded on reaching zero. The count is decremented twice at
+ each clock to generate a full high / low cycle at the full periodic rate.
+ If the count is even, the clock remains high for N/2 counts and low for N/2
+ counts; if the clock is odd, the clock is high for (N+1)/2 counts and low
+ for (N-1)/2 counts. Only even values are latched by the counter, so odd
+ values are not observed when reading. This is the intended mode for timer 2,
+ which generates sine-like tones by low-pass filtering the square wave output.
+
+Mode 4: Software Strobe. After programming this mode and loading the counter,
+ the output remains high until the counter reaches zero. Then the output
+ goes low for 1 clock cycle and returns high. The counter is not reloaded.
+ Counting only occurs when gate is high.
+
+Mode 5: Hardware Strobe. After programming and loading the counter, the
+ output remains high. When the gate is raised, a countdown is initiated
+ (which does not stop if the gate is lowered). When the counter reaches zero,
+ the output goes low for 1 clock cycle and then returns high. The counter is
+ not reloaded.
+
+In addition to normal binary counting, the PIT supports BCD counting. The
+command port, 0x43 is used to set the counter and mode for each of the three
+timers.
+
+PIT commands, issued to port 0x43, using the following bit encoding:
+
+Bit 7-4: Command (See table below)
+Bit 3-1: Mode (000 = Mode 0, 101 = Mode 5, 11X = undefined)
+Bit 0 : Binary (0) / BCD (1)
+
+Command table:
+
+0000 - Latch Timer 0 count for port 0x40
+ sample and hold the count to be read in port 0x40;
+ additional commands ignored until counter is read;
+ mode bits ignored.
+
+0001 - Set Timer 0 LSB mode for port 0x40
+ set timer to read LSB only and force MSB to zero;
+ mode bits set timer mode
+
+0010 - Set Timer 0 MSB mode for port 0x40
+ set timer to read MSB only and force LSB to zero;
+ mode bits set timer mode
+
+0011 - Set Timer 0 16-bit mode for port 0x40
+ set timer to read / write LSB first, then MSB;
+ mode bits set timer mode
+
+0100 - Latch Timer 1 count for port 0x41 - as described above
+0101 - Set Timer 1 LSB mode for port 0x41 - as described above
+0110 - Set Timer 1 MSB mode for port 0x41 - as described above
+0111 - Set Timer 1 16-bit mode for port 0x41 - as described above
+
+1000 - Latch Timer 2 count for port 0x42 - as described above
+1001 - Set Timer 2 LSB mode for port 0x42 - as described above
+1010 - Set Timer 2 MSB mode for port 0x42 - as described above
+1011 - Set Timer 2 16-bit mode for port 0x42 as described above
+
+1101 - General counter latch
+ Latch combination of counters into corresponding ports
+ Bit 3 = Counter 2
+ Bit 2 = Counter 1
+ Bit 1 = Counter 0
+ Bit 0 = Unused
+
+1110 - Latch timer status
+ Latch combination of counter mode into corresponding ports
+ Bit 3 = Counter 2
+ Bit 2 = Counter 1
+ Bit 1 = Counter 0
+
+ The output of ports 0x40-0x42 following this command will be:
+
+ Bit 7 = Output pin
+ Bit 6 = Count loaded (0 if timer has expired)
+ Bit 5-4 = Read / Write mode
+ 01 = MSB only
+ 10 = LSB only
+ 11 = LSB / MSB (16-bit)
+ Bit 3-1 = Mode
+ Bit 0 = Binary (0) / BCD mode (1)
+
+2.2) RTC
+
+The second device which was available in the original PC was the MC146818 real
+time clock. The original device is now obsolete, and usually emulated by the
+system chipset, sometimes by an HPET and some frankenstein IRQ routing.
+
+The RTC is accessed through CMOS variables, which uses an index register to
+control which bytes are read. Since there is only one index register, read
+of the CMOS and read of the RTC require lock protection (in addition, it is
+dangerous to allow userspace utilities such as hwclock to have direct RTC
+access, as they could corrupt kernel reads and writes of CMOS memory).
+
+The RTC generates an interrupt which is usually routed to IRQ 8. The interrupt
+can function as a periodic timer, an additional once a day alarm, and can issue
+interrupts after an update of the CMOS registers by the MC146818 is complete.
+The type of interrupt is signalled in the RTC status registers.
+
+The RTC will update the current time fields by battery power even while the
+system is off. The current time fields should not be read while an update is
+in progress, as indicated in the status register.
+
+The clock uses a 32.768kHz crystal, so bits 6-4 of register A should be
+programmed to a 32kHz divider if the RTC is to count seconds.
+
+This is the RAM map originally used for the RTC/CMOS:
+
+Location Size Description
+------------------------------------------
+00h byte Current second (BCD)
+01h byte Seconds alarm (BCD)
+02h byte Current minute (BCD)
+03h byte Minutes alarm (BCD)
+04h byte Current hour (BCD)
+05h byte Hours alarm (BCD)
+06h byte Current day of week (BCD)
+07h byte Current day of month (BCD)
+08h byte Current month (BCD)
+09h byte Current year (BCD)
+0Ah byte Register A
+ bit 7 = Update in progress
+ bit 6-4 = Divider for clock
+ 000 = 4.194 MHz
+ 001 = 1.049 MHz
+ 010 = 32 kHz
+ 10X = test modes
+ 110 = reset / disable
+ 111 = reset / disable
+ bit 3-0 = Rate selection for periodic interrupt
+ 000 = periodic timer disabled
+ 001 = 3.90625 uS
+ 010 = 7.8125 uS
+ 011 = .122070 mS
+ 100 = .244141 mS
+ ...
+ 1101 = 125 mS
+ 1110 = 250 mS
+ 1111 = 500 mS
+0Bh byte Register B
+ bit 7 = Run (0) / Halt (1)
+ bit 6 = Periodic interrupt enable
+ bit 5 = Alarm interrupt enable
+ bit 4 = Update-ended interrupt enable
+ bit 3 = Square wave interrupt enable
+ bit 2 = BCD calendar (0) / Binary (1)
+ bit 1 = 12-hour mode (0) / 24-hour mode (1)
+ bit 0 = 0 (DST off) / 1 (DST enabled)
+OCh byte Register C (read only)
+ bit 7 = interrupt request flag (IRQF)
+ bit 6 = periodic interrupt flag (PF)
+ bit 5 = alarm interrupt flag (AF)
+ bit 4 = update interrupt flag (UF)
+ bit 3-0 = reserved
+ODh byte Register D (read only)
+ bit 7 = RTC has power
+ bit 6-0 = reserved
+32h byte Current century BCD (*)
+ (*) location vendor specific and now determined from ACPI global tables
+
+2.3) APIC
+
+On Pentium and later processors, an on-board timer is available to each CPU
+as part of the Advanced Programmable Interrupt Controller. The APIC is
+accessed through memory-mapped registers and provides interrupt service to each
+CPU, used for IPIs and local timer interrupts.
+
+Although in theory the APIC is a safe and stable source for local interrupts,
+in practice, many bugs and glitches have occurred due to the special nature of
+the APIC CPU-local memory-mapped hardware. Beware that CPU errata may affect
+the use of the APIC and that workarounds may be required. In addition, some of
+these workarounds pose unique constraints for virtualization - requiring either
+extra overhead incurred from extra reads of memory-mapped I/O or additional
+functionality that may be more computationally expensive to implement.
+
+Since the APIC is documented quite well in the Intel and AMD manuals, we will
+avoid repetition of the detail here. It should be pointed out that the APIC
+timer is programmed through the LVT (local vector timer) register, is capable
+of one-shot or periodic operation, and is based on the bus clock divided down
+by the programmable divider register.
+
+2.4) HPET
+
+HPET is quite complex, and was originally intended to replace the PIT / RTC
+support of the X86 PC. It remains to be seen whether that will be the case, as
+the de facto standard of PC hardware is to emulate these older devices. Some
+systems designated as legacy free may support only the HPET as a hardware timer
+device.
+
+The HPET spec is rather loose and vague, requiring at least 3 hardware timers,
+but allowing implementation freedom to support many more. It also imposes no
+fixed rate on the timer frequency, but does impose some extremal values on
+frequency, error and slew.
+
+In general, the HPET is recommended as a high precision (compared to PIT /RTC)
+time source which is independent of local variation (as there is only one HPET
+in any given system). The HPET is also memory-mapped, and its presence is
+indicated through ACPI tables by the BIOS.
+
+Detailed specification of the HPET is beyond the current scope of this
+document, as it is also very well documented elsewhere.
+
+2.5) Offboard Timers
+
+Several cards, both proprietary (watchdog boards) and commonplace (e1000) have
+timing chips built into the cards which may have registers which are accessible
+to kernel or user drivers. To the author's knowledge, using these to generate
+a clocksource for a Linux or other kernel has not yet been attempted and is in
+general frowned upon as not playing by the agreed rules of the game. Such a
+timer device would require additional support to be virtualized properly and is
+not considered important at this time as no known operating system does this.
+
+=========================================================================
+
+3) TSC Hardware
+
+The TSC or time stamp counter is relatively simple in theory; it counts
+instruction cycles issued by the processor, which can be used as a measure of
+time. In practice, due to a number of problems, it is the most complicated
+timekeeping device to use.
+
+The TSC is represented internally as a 64-bit MSR which can be read with the
+RDMSR, RDTSC, or RDTSCP (when available) instructions. In the past, hardware
+limitations made it possible to write the TSC, but generally on old hardware it
+was only possible to write the low 32-bits of the 64-bit counter, and the upper
+32-bits of the counter were cleared. Now, however, on Intel processors family
+0Fh, for models 3, 4 and 6, and family 06h, models e and f, this restriction
+has been lifted and all 64-bits are writable. On AMD systems, the ability to
+write the TSC MSR is not an architectural guarantee.
+
+The TSC is accessible from CPL-0 and conditionally, for CPL > 0 software by
+means of the CR4.TSD bit, which when enabled, disables CPL > 0 TSC access.
+
+Some vendors have implemented an additional instruction, RDTSCP, which returns
+atomically not just the TSC, but an indicator which corresponds to the
+processor number. This can be used to index into an array of TSC variables to
+determine offset information in SMP systems where TSCs are not synchronized.
+The presence of this instruction must be determined by consulting CPUID feature
+bits.
+
+Both VMX and SVM provide extension fields in the virtualization hardware which
+allows the guest visible TSC to be offset by a constant. Newer implementations
+promise to allow the TSC to additionally be scaled, but this hardware is not
+yet widely available.
+
+3.1) TSC synchronization
+
+The TSC is a CPU-local clock in most implementations. This means, on SMP
+platforms, the TSCs of different CPUs may start at different times depending
+on when the CPUs are powered on. Generally, CPUs on the same die will share
+the same clock, however, this is not always the case.
+
+The BIOS may attempt to resynchronize the TSCs during the poweron process and
+the operating system or other system software may attempt to do this as well.
+Several hardware limitations make the problem worse - if it is not possible to
+write the full 64-bits of the TSC, it may be impossible to match the TSC in
+newly arriving CPUs to that of the rest of the system, resulting in
+unsynchronized TSCs. This may be done by BIOS or system software, but in
+practice, getting a perfectly synchronized TSC will not be possible unless all
+values are read from the same clock, which generally only is possible on single
+socket systems or those with special hardware support.
+
+3.2) TSC and CPU hotplug
+
+As touched on already, CPUs which arrive later than the boot time of the system
+may not have a TSC value that is synchronized with the rest of the system.
+Either system software, BIOS, or SMM code may actually try to establish the TSC
+to a value matching the rest of the system, but a perfect match is usually not
+a guarantee. This can have the effect of bringing a system from a state where
+TSC is synchronized back to a state where TSC synchronization flaws, however
+small, may be exposed to the OS and any virtualization environment.
+
+3.3) TSC and multi-socket / NUMA
+
+Multi-socket systems, especially large multi-socket systems are likely to have
+individual clocksources rather than a single, universally distributed clock.
+Since these clocks are driven by different crystals, they will not have
+perfectly matched frequency, and temperature and electrical variations will
+cause the CPU clocks, and thus the TSCs to drift over time. Depending on the
+exact clock and bus design, the drift may or may not be fixed in absolute
+error, and may accumulate over time.
+
+In addition, very large systems may deliberately slew the clocks of individual
+cores. This technique, known as spread-spectrum clocking, reduces EMI at the
+clock frequency and harmonics of it, which may be required to pass FCC
+standards for telecommunications and computer equipment.
+
+It is recommended not to trust the TSCs to remain synchronized on NUMA or
+multiple socket systems for these reasons.
+
+3.4) TSC and C-states
+
+C-states, or idling states of the processor, especially C1E and deeper sleep
+states may be problematic for TSC as well. The TSC may stop advancing in such
+a state, resulting in a TSC which is behind that of other CPUs when execution
+is resumed. Such CPUs must be detected and flagged by the operating system
+based on CPU and chipset identifications.
+
+The TSC in such a case may be corrected by catching it up to a known external
+clocksource.
+
+3.5) TSC frequency change / P-states
+
+To make things slightly more interesting, some CPUs may change frequency. They
+may or may not run the TSC at the same rate, and because the frequency change
+may be staggered or slewed, at some points in time, the TSC rate may not be
+known other than falling within a range of values. In this case, the TSC will
+not be a stable time source, and must be calibrated against a known, stable,
+external clock to be a usable source of time.
+
+Whether the TSC runs at a constant rate or scales with the P-state is model
+dependent and must be determined by inspecting CPUID, chipset or vendor
+specific MSR fields.
+
+In addition, some vendors have known bugs where the P-state is actually
+compensated for properly during normal operation, but when the processor is
+inactive, the P-state may be raised temporarily to service cache misses from
+other processors. In such cases, the TSC on halted CPUs could advance faster
+than that of non-halted processors. AMD Turion processors are known to have
+this problem.
+
+3.6) TSC and STPCLK / T-states
+
+External signals given to the processor may also have the effect of stopping
+the TSC. This is typically done for thermal emergency power control to prevent
+an overheating condition, and typically, there is no way to detect that this
+condition has happened.
+
+3.7) TSC virtualization - VMX
+
+VMX provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
+instructions, which is enough for full virtualization of TSC in any manner. In
+addition, VMX allows passing through the host TSC plus an additional TSC_OFFSET
+field specified in the VMCS. Special instructions must be used to read and
+write the VMCS field.
+
+3.8) TSC virtualization - SVM
+
+SVM provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
+instructions, which is enough for full virtualization of TSC in any manner. In
+addition, SVM allows passing through the host TSC plus an additional offset
+field specified in the SVM control block.
+
+3.9) TSC feature bits in Linux
+
+In summary, there is no way to guarantee the TSC remains in perfect
+synchronization unless it is explicitly guaranteed by the architecture. Even
+if so, the TSCs in multi-sockets or NUMA systems may still run independently
+despite being locally consistent.
+
+The following feature bits are used by Linux to signal various TSC attributes,
+but they can only be taken to be meaningful for UP or single node systems.
+
+X86_FEATURE_TSC : The TSC is available in hardware
+X86_FEATURE_RDTSCP : The RDTSCP instruction is available
+X86_FEATURE_CONSTANT_TSC : The TSC rate is unchanged with P-states
+X86_FEATURE_NONSTOP_TSC : The TSC does not stop in C-states
+X86_FEATURE_TSC_RELIABLE : TSC sync checks are skipped (VMware)
+
+4) Virtualization Problems
+
+Timekeeping is especially problematic for virtualization because a number of
+challenges arise. The most obvious problem is that time is now shared between
+the host and, potentially, a number of virtual machines. Thus the virtual
+operating system does not run with 100% usage of the CPU, despite the fact that
+it may very well make that assumption. It may expect it to remain true to very
+exacting bounds when interrupt sources are disabled, but in reality only its
+virtual interrupt sources are disabled, and the machine may still be preempted
+at any time. This causes problems as the passage of real time, the injection
+of machine interrupts and the associated clock sources are no longer completely
+synchronized with real time.
+
+This same problem can occur on native harware to a degree, as SMM mode may
+steal cycles from the naturally on X86 systems when SMM mode is used by the
+BIOS, but not in such an extreme fashion. However, the fact that SMM mode may
+cause similar problems to virtualization makes it a good justification for
+solving many of these problems on bare metal.
+
+4.1) Interrupt clocking
+
+One of the most immediate problems that occurs with legacy operating systems
+is that the system timekeeping routines are often designed to keep track of
+time by counting periodic interrupts. These interrupts may come from the PIT
+or the RTC, but the problem is the same: the host virtualization engine may not
+be able to deliver the proper number of interrupts per second, and so guest
+time may fall behind. This is especially problematic if a high interrupt rate
+is selected, such as 1000 HZ, which is unfortunately the default for many Linux
+guests.
+
+There are three approaches to solving this problem; first, it may be possible
+to simply ignore it. Guests which have a separate time source for tracking
+'wall clock' or 'real time' may not need any adjustment of their interrupts to
+maintain proper time. If this is not sufficient, it may be necessary to inject
+additional interrupts into the guest in order to increase the effective
+interrupt rate. This approach leads to complications in extreme conditions,
+where host load or guest lag is too much to compensate for, and thus another
+solution to the problem has risen: the guest may need to become aware of lost
+ticks and compensate for them internally. Although promising in theory, the
+implementation of this policy in Linux has been extremely error prone, and a
+number of buggy variants of lost tick compensation are distributed across
+commonly used Linux systems.
+
+Windows uses periodic RTC clocking as a means of keeping time internally, and
+thus requires interrupt slewing to keep proper time. It does use a low enough
+rate (ed: is it 18.2 Hz?) however that it has not yet been a problem in
+practice.
+
+4.2) TSC sampling and serialization
+
+As the highest precision time source available, the cycle counter of the CPU
+has aroused much interest from developers. As explained above, this timer has
+many problems unique to its nature as a local, potentially unstable and
+potentially unsynchronized source. One issue which is not unique to the TSC,
+but is highlighted because of its very precise nature is sampling delay. By
+definition, the counter, once read is already old. However, it is also
+possible for the counter to be read ahead of the actual use of the result.
+This is a consequence of the superscalar execution of the instruction stream,
+which may execute instructions out of order. Such execution is called
+non-serialized. Forcing serialized execution is necessary for precise
+measurement with the TSC, and requires a serializing instruction, such as CPUID
+or an MSR read.
+
+Since CPUID may actually be virtualized by a trap and emulate mechanism, this
+serialization can pose a performance issue for hardware virtualization. An
+accurate time stamp counter reading may therefore not always be available, and
+it may be necessary for an implementation to guard against "backwards" reads of
+the TSC as seen from other CPUs, even in an otherwise perfectly synchronized
+system.
+
+4.3) Timespec aliasing
+
+Additionally, this lack of serialization from the TSC poses another challenge
+when using results of the TSC when measured against another time source. As
+the TSC is much higher precision, many possible values of the TSC may be read
+while another clock is still expressing the same value.
+
+That is, you may read (T,T+10) while external clock C maintains the same value.
+Due to non-serialized reads, you may actually end up with a range which
+fluctuates - from (T-1.. T+10). Thus, any time calculated from a TSC, but
+calibrated against an external value may have a range of valid values.
+Re-calibrating this computation may actually cause time, as computed after the
+calibration, to go backwards, compared with time computed before the
+calibration.
+
+This problem is particularly pronounced with an internal time source in Linux,
+the kernel time, which is expressed in the theoretically high resolution
+timespec - but which advances in much larger granularity intervals, sometimes
+at the rate of jiffies, and possibly in catchup modes, at a much larger step.
+
+This aliasing requires care in the computation and recalibration of kvmclock
+and any other values derived from TSC computation (such as TSC virtualization
+itself).
+
+4.4) Migration
+
+Migration of a virtual machine raises problems for timekeeping in two ways.
+First, the migration itself may take time, during which interrupts cannot be
+delivered, and after which, the guest time may need to be caught up. NTP may
+be able to help to some degree here, as the clock correction required is
+typically small enough to fall in the NTP-correctable window.
+
+An additional concern is that timers based off the TSC (or HPET, if the raw bus
+clock is exposed) may now be running at different rates, requiring compensation
+in some way in the hypervisor by virtualizing these timers. In addition,
+migrating to a faster machine may preclude the use of a passthrough TSC, as a
+faster clock cannot be made visible to a guest without the potential of time
+advancing faster than usual. A slower clock is less of a problem, as it can
+always be caught up to the original rate. KVM clock avoids these problems by
+simply storing multipliers and offsets against the TSC for the guest to convert
+back into nanosecond resolution values.
+
+4.5) Scheduling
+
+Since scheduling may be based on precise timing and firing of interrupts, the
+scheduling algorithms of an operating system may be adversely affected by
+virtualization. In theory, the effect is random and should be universally
+distributed, but in contrived as well as real scenarios (guest device access,
+causes of virtualization exits, possible context switch), this may not always
+be the case. The effect of this has not been well studied.
+
+In an attempt to work around this, several implementations have provided a
+paravirtualized scheduler clock, which reveals the true amount of CPU time for
+which a virtual machine has been running.
+
+4.6) Watchdogs
+
+Watchdog timers, such as the lock detector in Linux may fire accidentally when
+running under hardware virtualization due to timer interrupts being delayed or
+misinterpretation of the passage of real time. Usually, these warnings are
+spurious and can be ignored, but in some circumstances it may be necessary to
+disable such detection.
+
+4.7) Delays and precision timing
+
+Precise timing and delays may not be possible in a virtualized system. This
+can happen if the system is controlling physical hardware, or issues delays to
+compensate for slower I/O to and from devices. The first issue is not solvable
+in general for a virtualized system; hardware control software can't be
+adequately virtualized without a full real-time operating system, which would
+require an RT aware virtualization platform.
+
+The second issue may cause performance problems, but this is unlikely to be a
+significant issue. In many cases these delays may be eliminated through
+configuration or paravirtualization.
+
+4.8) Covert channels and leaks
+
+In addition to the above problems, time information will inevitably leak to the
+guest about the host in anything but a perfect implementation of virtualized
+time. This may allow the guest to infer the presence of a hypervisor (as in a
+red-pill type detection), and it may allow information to leak between guests
+by using CPU utilization itself as a signalling channel. Preventing such
+problems would require completely isolated virtual time which may not track
+real time any longer. This may be useful in certain security or QA contexts,
+but in general isn't recommended for real-world deployment scenarios.
diff --git a/MAINTAINERS b/MAINTAINERS
index 494e1a0..0e265fc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4453,6 +4453,15 @@
S: Maintained
F: drivers/i2c/busses/i2c-pasemi.c
+PADATA PARALLEL EXECUTION MECHANISM
+M: Steffen Klassert <steffen.klassert@secunet.com>
+L: linux-kernel@vger.kernel.org
+L: linux-crypto@vger.kernel.org
+S: Maintained
+F: kernel/padata.c
+F: include/linux/padata.h
+F: Documentation/padata.txt
+
PANASONIC LAPTOP ACPI EXTRAS DRIVER
M: Harald Welte <laforge@gnumonks.org>
L: platform-driver-x86@vger.kernel.org
@@ -4532,6 +4541,12 @@
F: drivers/leds/leds-pca9532.c
F: include/linux/leds-pca9532.h
+PCA9541 I2C BUS MASTER SELECTOR DRIVER
+M: Guenter Roeck <guenter.roeck@ericsson.com>
+L: linux-i2c@vger.kernel.org
+S: Maintained
+F: drivers/i2c/muxes/pca9541.c
+
PCA9564/PCA9665 I2C BUS DRIVER
M: Wolfram Sang <w.sang@pengutronix.de>
L: linux-i2c@vger.kernel.org
@@ -4580,6 +4595,13 @@
S: Maintained
F: drivers/net/pcnet32.c
+PCRYPT PARALLEL CRYPTO ENGINE
+M: Steffen Klassert <steffen.klassert@secunet.com>
+L: linux-crypto@vger.kernel.org
+S: Maintained
+F: crypto/pcrypt.c
+F: include/crypto/pcrypt.h
+
PER-TASK DELAY ACCOUNTING
M: Balbir Singh <balbir@linux.vnet.ibm.com>
S: Maintained
diff --git a/arch/arm/mach-omap2/clock2420_data.c b/arch/arm/mach-omap2/clock2420_data.c
index 37d65d6..5f2066a 100644
--- a/arch/arm/mach-omap2/clock2420_data.c
+++ b/arch/arm/mach-omap2/clock2420_data.c
@@ -1838,7 +1838,7 @@
CLK(NULL, "des_ick", &des_ick, CK_242X),
CLK("omap-sham", "ick", &sha_ick, CK_242X),
CLK("omap_rng", "ick", &rng_ick, CK_242X),
- CLK(NULL, "aes_ick", &aes_ick, CK_242X),
+ CLK("omap-aes", "ick", &aes_ick, CK_242X),
CLK(NULL, "pka_ick", &pka_ick, CK_242X),
CLK(NULL, "usb_fck", &usb_fck, CK_242X),
CLK("musb_hdrc", "fck", &osc_ck, CK_242X),
diff --git a/arch/arm/mach-omap2/clock2430_data.c b/arch/arm/mach-omap2/clock2430_data.c
index b33118f..701a171 100644
--- a/arch/arm/mach-omap2/clock2430_data.c
+++ b/arch/arm/mach-omap2/clock2430_data.c
@@ -1926,7 +1926,7 @@
CLK(NULL, "des_ick", &des_ick, CK_243X),
CLK("omap-sham", "ick", &sha_ick, CK_243X),
CLK("omap_rng", "ick", &rng_ick, CK_243X),
- CLK(NULL, "aes_ick", &aes_ick, CK_243X),
+ CLK("omap-aes", "ick", &aes_ick, CK_243X),
CLK(NULL, "pka_ick", &pka_ick, CK_243X),
CLK(NULL, "usb_fck", &usb_fck, CK_243X),
CLK("musb_hdrc", "ick", &usbhs_ick, CK_243X),
diff --git a/arch/arm/mach-omap2/clock3xxx_data.c b/arch/arm/mach-omap2/clock3xxx_data.c
index dfdce2d..c73906d 100644
--- a/arch/arm/mach-omap2/clock3xxx_data.c
+++ b/arch/arm/mach-omap2/clock3xxx_data.c
@@ -3288,7 +3288,7 @@
CLK(NULL, "usbtll_ick", &usbtll_ick, CK_3430ES2 | CK_AM35XX),
CLK("mmci-omap-hs.2", "ick", &mmchs3_ick, CK_3430ES2 | CK_AM35XX),
CLK(NULL, "icr_ick", &icr_ick, CK_343X),
- CLK(NULL, "aes2_ick", &aes2_ick, CK_343X),
+ CLK("omap-aes", "ick", &aes2_ick, CK_343X),
CLK("omap-sham", "ick", &sha12_ick, CK_343X),
CLK(NULL, "des2_ick", &des2_ick, CK_343X),
CLK("mmci-omap-hs.1", "ick", &mmchs2_ick, CK_3XXX),
diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index 2dbb265..b27e7cb 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -498,6 +498,76 @@
static inline void omap_init_sham(void) { }
#endif
+#if defined(CONFIG_CRYPTO_DEV_OMAP_AES) || defined(CONFIG_CRYPTO_DEV_OMAP_AES_MODULE)
+
+#ifdef CONFIG_ARCH_OMAP24XX
+static struct resource omap2_aes_resources[] = {
+ {
+ .start = OMAP24XX_SEC_AES_BASE,
+ .end = OMAP24XX_SEC_AES_BASE + 0x4C,
+ .flags = IORESOURCE_MEM,
+ },
+ {
+ .start = OMAP24XX_DMA_AES_TX,
+ .flags = IORESOURCE_DMA,
+ },
+ {
+ .start = OMAP24XX_DMA_AES_RX,
+ .flags = IORESOURCE_DMA,
+ }
+};
+static int omap2_aes_resources_sz = ARRAY_SIZE(omap2_aes_resources);
+#else
+#define omap2_aes_resources NULL
+#define omap2_aes_resources_sz 0
+#endif
+
+#ifdef CONFIG_ARCH_OMAP34XX
+static struct resource omap3_aes_resources[] = {
+ {
+ .start = OMAP34XX_SEC_AES_BASE,
+ .end = OMAP34XX_SEC_AES_BASE + 0x4C,
+ .flags = IORESOURCE_MEM,
+ },
+ {
+ .start = OMAP34XX_DMA_AES2_TX,
+ .flags = IORESOURCE_DMA,
+ },
+ {
+ .start = OMAP34XX_DMA_AES2_RX,
+ .flags = IORESOURCE_DMA,
+ }
+};
+static int omap3_aes_resources_sz = ARRAY_SIZE(omap3_aes_resources);
+#else
+#define omap3_aes_resources NULL
+#define omap3_aes_resources_sz 0
+#endif
+
+static struct platform_device aes_device = {
+ .name = "omap-aes",
+ .id = -1,
+};
+
+static void omap_init_aes(void)
+{
+ if (cpu_is_omap24xx()) {
+ aes_device.resource = omap2_aes_resources;
+ aes_device.num_resources = omap2_aes_resources_sz;
+ } else if (cpu_is_omap34xx()) {
+ aes_device.resource = omap3_aes_resources;
+ aes_device.num_resources = omap3_aes_resources_sz;
+ } else {
+ pr_err("%s: platform not supported\n", __func__);
+ return;
+ }
+ platform_device_register(&aes_device);
+}
+
+#else
+static inline void omap_init_aes(void) { }
+#endif
+
/*-------------------------------------------------------------------------*/
#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4)
@@ -854,6 +924,7 @@
omap_hdq_init();
omap_init_sti();
omap_init_sham();
+ omap_init_aes();
omap_init_vout();
return 0;
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
index ee541ce..c5f92a9 100644
--- a/arch/ia64/kvm/lapic.h
+++ b/arch/ia64/kvm/lapic.h
@@ -25,5 +25,6 @@
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
#define kvm_apic_present(x) (true)
+#define kvm_lapic_enabled(x) (true)
#endif
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 6c5547d..18ea696 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -86,5 +86,6 @@
#define KVM_INTERRUPT_SET -1U
#define KVM_INTERRUPT_UNSET -2U
+#define KVM_INTERRUPT_SET_LEVEL -3U
#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index c5ea4cd..5b75046 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -58,6 +58,7 @@
#define BOOK3S_INTERRUPT_INST_STORAGE 0x400
#define BOOK3S_INTERRUPT_INST_SEGMENT 0x480
#define BOOK3S_INTERRUPT_EXTERNAL 0x500
+#define BOOK3S_INTERRUPT_EXTERNAL_LEVEL 0x501
#define BOOK3S_INTERRUPT_ALIGNMENT 0x600
#define BOOK3S_INTERRUPT_PROGRAM 0x700
#define BOOK3S_INTERRUPT_FP_UNAVAIL 0x800
@@ -84,7 +85,8 @@
#define BOOK3S_IRQPRIO_EXTERNAL 13
#define BOOK3S_IRQPRIO_DECREMENTER 14
#define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 15
-#define BOOK3S_IRQPRIO_MAX 16
+#define BOOK3S_IRQPRIO_EXTERNAL_LEVEL 16
+#define BOOK3S_IRQPRIO_MAX 17
#define BOOK3S_HFLAG_DCBZ32 0x1
#define BOOK3S_HFLAG_SLB 0x2
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 8274a2d..d62e703 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -38,15 +38,6 @@
bool class : 1;
};
-struct kvmppc_sr {
- u32 raw;
- u32 vsid;
- bool Ks : 1;
- bool Kp : 1;
- bool nx : 1;
- bool valid : 1;
-};
-
struct kvmppc_bat {
u64 raw;
u32 bepi;
@@ -69,6 +60,13 @@
#define SID_MAP_NUM (1 << SID_MAP_BITS)
#define SID_MAP_MASK (SID_MAP_NUM - 1)
+#ifdef CONFIG_PPC_BOOK3S_64
+#define SID_CONTEXTS 1
+#else
+#define SID_CONTEXTS 128
+#define VSID_POOL_SIZE (SID_CONTEXTS * 16)
+#endif
+
struct kvmppc_vcpu_book3s {
struct kvm_vcpu vcpu;
struct kvmppc_book3s_shadow_vcpu *shadow_vcpu;
@@ -79,20 +77,22 @@
u64 vsid;
} slb_shadow[64];
u8 slb_shadow_max;
- struct kvmppc_sr sr[16];
struct kvmppc_bat ibat[8];
struct kvmppc_bat dbat[8];
u64 hid[6];
u64 gqr[8];
int slb_nr;
- u32 dsisr;
u64 sdr1;
u64 hior;
u64 msr_mask;
- u64 vsid_first;
u64 vsid_next;
+#ifdef CONFIG_PPC_BOOK3S_32
+ u32 vsid_pool[VSID_POOL_SIZE];
+#else
+ u64 vsid_first;
u64 vsid_max;
- int context_id;
+#endif
+ int context_id[SID_CONTEXTS];
ulong prog_flags; /* flags to inject when giving a 700 trap */
};
@@ -131,9 +131,10 @@
bool upper, u32 val);
extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
+extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
-extern u32 kvmppc_trampoline_lowmem;
-extern u32 kvmppc_trampoline_enter;
+extern ulong kvmppc_trampoline_lowmem;
+extern ulong kvmppc_trampoline_enter;
extern void kvmppc_rmcall(ulong srr0, ulong srr1);
extern void kvmppc_load_up_fpu(void);
extern void kvmppc_load_up_altivec(void);
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index b0b23c0..bba3b9b 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -25,6 +25,7 @@
#include <linux/interrupt.h>
#include <linux/types.h>
#include <linux/kvm_types.h>
+#include <linux/kvm_para.h>
#include <asm/kvm_asm.h>
#define KVM_MAX_VCPUS 1
@@ -41,12 +42,17 @@
#define HPTEG_CACHE_NUM (1 << 15)
#define HPTEG_HASH_BITS_PTE 13
+#define HPTEG_HASH_BITS_PTE_LONG 12
#define HPTEG_HASH_BITS_VPTE 13
#define HPTEG_HASH_BITS_VPTE_LONG 5
#define HPTEG_HASH_NUM_PTE (1 << HPTEG_HASH_BITS_PTE)
+#define HPTEG_HASH_NUM_PTE_LONG (1 << HPTEG_HASH_BITS_PTE_LONG)
#define HPTEG_HASH_NUM_VPTE (1 << HPTEG_HASH_BITS_VPTE)
#define HPTEG_HASH_NUM_VPTE_LONG (1 << HPTEG_HASH_BITS_VPTE_LONG)
+/* Physical Address Mask - allowed range of real mode RAM access */
+#define KVM_PAM 0x0fffffffffffffffULL
+
struct kvm;
struct kvm_run;
struct kvm_vcpu;
@@ -159,8 +165,10 @@
struct hpte_cache {
struct hlist_node list_pte;
+ struct hlist_node list_pte_long;
struct hlist_node list_vpte;
struct hlist_node list_vpte_long;
+ struct rcu_head rcu_head;
u64 host_va;
u64 pfn;
ulong slot;
@@ -210,28 +218,20 @@
u32 cr;
#endif
- ulong msr;
#ifdef CONFIG_PPC_BOOK3S
ulong shadow_msr;
ulong hflags;
ulong guest_owned_ext;
#endif
u32 mmucr;
- ulong sprg0;
- ulong sprg1;
- ulong sprg2;
- ulong sprg3;
ulong sprg4;
ulong sprg5;
ulong sprg6;
ulong sprg7;
- ulong srr0;
- ulong srr1;
ulong csrr0;
ulong csrr1;
ulong dsrr0;
ulong dsrr1;
- ulong dear;
ulong esr;
u32 dec;
u32 decar;
@@ -290,12 +290,17 @@
struct tasklet_struct tasklet;
u64 dec_jiffies;
unsigned long pending_exceptions;
+ struct kvm_vcpu_arch_shared *shared;
+ unsigned long magic_page_pa; /* phys addr to map the magic page to */
+ unsigned long magic_page_ea; /* effect. addr to map the magic page to */
#ifdef CONFIG_PPC_BOOK3S
struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE];
+ struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG];
struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE];
struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG];
int hpte_cache_count;
+ spinlock_t mmu_lock;
#endif
};
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 2d48f6a..50533f9 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -20,16 +20,153 @@
#ifndef __POWERPC_KVM_PARA_H__
#define __POWERPC_KVM_PARA_H__
+#include <linux/types.h>
+
+struct kvm_vcpu_arch_shared {
+ __u64 scratch1;
+ __u64 scratch2;
+ __u64 scratch3;
+ __u64 critical; /* Guest may not get interrupts if == r1 */
+ __u64 sprg0;
+ __u64 sprg1;
+ __u64 sprg2;
+ __u64 sprg3;
+ __u64 srr0;
+ __u64 srr1;
+ __u64 dar;
+ __u64 msr;
+ __u32 dsisr;
+ __u32 int_pending; /* Tells the guest if we have an interrupt */
+ __u32 sr[16];
+};
+
+#define KVM_SC_MAGIC_R0 0x4b564d21 /* "KVM!" */
+#define HC_VENDOR_KVM (42 << 16)
+#define HC_EV_SUCCESS 0
+#define HC_EV_UNIMPLEMENTED 12
+
+#define KVM_FEATURE_MAGIC_PAGE 1
+
+#define KVM_MAGIC_FEAT_SR (1 << 0)
+
#ifdef __KERNEL__
+#ifdef CONFIG_KVM_GUEST
+
+#include <linux/of.h>
+
+static inline int kvm_para_available(void)
+{
+ struct device_node *hyper_node;
+
+ hyper_node = of_find_node_by_path("/hypervisor");
+ if (!hyper_node)
+ return 0;
+
+ if (!of_device_is_compatible(hyper_node, "linux,kvm"))
+ return 0;
+
+ return 1;
+}
+
+extern unsigned long kvm_hypercall(unsigned long *in,
+ unsigned long *out,
+ unsigned long nr);
+
+#else
+
static inline int kvm_para_available(void)
{
return 0;
}
+static unsigned long kvm_hypercall(unsigned long *in,
+ unsigned long *out,
+ unsigned long nr)
+{
+ return HC_EV_UNIMPLEMENTED;
+}
+
+#endif
+
+static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2)
+{
+ unsigned long in[8];
+ unsigned long out[8];
+ unsigned long r;
+
+ r = kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+ *r2 = out[0];
+
+ return r;
+}
+
+static inline long kvm_hypercall0(unsigned int nr)
+{
+ unsigned long in[8];
+ unsigned long out[8];
+
+ return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+}
+
+static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
+{
+ unsigned long in[8];
+ unsigned long out[8];
+
+ in[0] = p1;
+ return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+}
+
+static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
+ unsigned long p2)
+{
+ unsigned long in[8];
+ unsigned long out[8];
+
+ in[0] = p1;
+ in[1] = p2;
+ return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+}
+
+static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
+ unsigned long p2, unsigned long p3)
+{
+ unsigned long in[8];
+ unsigned long out[8];
+
+ in[0] = p1;
+ in[1] = p2;
+ in[2] = p3;
+ return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+}
+
+static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4)
+{
+ unsigned long in[8];
+ unsigned long out[8];
+
+ in[0] = p1;
+ in[1] = p2;
+ in[2] = p3;
+ in[3] = p4;
+ return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+}
+
+
static inline unsigned int kvm_arch_para_features(void)
{
- return 0;
+ unsigned long r;
+
+ if (!kvm_para_available())
+ return 0;
+
+ if(kvm_hypercall0_1(KVM_HC_FEATURES, &r))
+ return 0;
+
+ return r;
}
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 18d139e..ecb3bc7 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -107,6 +107,7 @@
extern void kvmppc_booke_exit(void);
extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
+extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu);
/*
* Cuts out inst bits with ordering according to spec.
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 4ed076a..36c30f3 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -129,6 +129,8 @@
obj-y += ppc_save_regs.o
endif
+obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
+
# Disable GCOV in odd or sensitive code
GCOV_PROFILE_prom_init.o := n
GCOV_PROFILE_ftrace.o := n
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index c3e0194..bd0df2e 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -48,11 +48,11 @@
#ifdef CONFIG_PPC_ISERIES
#include <asm/iseries/alpaca.h>
#endif
-#ifdef CONFIG_KVM
+#if defined(CONFIG_KVM) || defined(CONFIG_KVM_GUEST)
#include <linux/kvm_host.h>
-#ifndef CONFIG_BOOKE
-#include <asm/kvm_book3s.h>
#endif
+#if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S)
+#include <asm/kvm_book3s.h>
#endif
#ifdef CONFIG_PPC32
@@ -396,12 +396,13 @@
DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
- DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr));
DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4));
DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));
DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7));
DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid));
+ DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
+ DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
/* book3s */
#ifdef CONFIG_PPC_BOOK3S
@@ -466,6 +467,22 @@
DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
#endif /* CONFIG_PPC_BOOK3S */
#endif
+
+#ifdef CONFIG_KVM_GUEST
+ DEFINE(KVM_MAGIC_SCRATCH1, offsetof(struct kvm_vcpu_arch_shared,
+ scratch1));
+ DEFINE(KVM_MAGIC_SCRATCH2, offsetof(struct kvm_vcpu_arch_shared,
+ scratch2));
+ DEFINE(KVM_MAGIC_SCRATCH3, offsetof(struct kvm_vcpu_arch_shared,
+ scratch3));
+ DEFINE(KVM_MAGIC_INT, offsetof(struct kvm_vcpu_arch_shared,
+ int_pending));
+ DEFINE(KVM_MAGIC_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
+ DEFINE(KVM_MAGIC_CRITICAL, offsetof(struct kvm_vcpu_arch_shared,
+ critical));
+ DEFINE(KVM_MAGIC_SR, offsetof(struct kvm_vcpu_arch_shared, sr));
+#endif
+
#ifdef CONFIG_44x
DEFINE(PGD_T_LOG2, PGD_T_LOG2);
DEFINE(PTE_T_LOG2, PTE_T_LOG2);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 39b0c48..9f8b01d 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -299,6 +299,12 @@
b . /* prevent spec. execution */
#endif /* __DISABLED__ */
+/* KVM's trampoline code needs to be close to the interrupt handlers */
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+#include "../kvm/book3s_rmhandlers.S"
+#endif
+
.align 7
.globl __end_interrupts
__end_interrupts:
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index c571cd3..f0dd577 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -166,12 +166,6 @@
#include "exceptions-64s.S"
#endif
-/* KVM trampoline code needs to be close to the interrupt handlers */
-
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#include "../kvm/book3s_rmhandlers.S"
-#endif
-
_GLOBAL(generic_secondary_thread_init)
mr r24,r3
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
new file mode 100644
index 0000000..428d0e5
--- /dev/null
+++ b/arch/powerpc/kernel/kvm.c
@@ -0,0 +1,596 @@
+/*
+ * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ * Alexander Graf <agraf@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/init.h>
+#include <linux/kvm_para.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+
+#include <asm/reg.h>
+#include <asm/sections.h>
+#include <asm/cacheflush.h>
+#include <asm/disassemble.h>
+
+#define KVM_MAGIC_PAGE (-4096L)
+#define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x)
+
+#define KVM_INST_LWZ 0x80000000
+#define KVM_INST_STW 0x90000000
+#define KVM_INST_LD 0xe8000000
+#define KVM_INST_STD 0xf8000000
+#define KVM_INST_NOP 0x60000000
+#define KVM_INST_B 0x48000000
+#define KVM_INST_B_MASK 0x03ffffff
+#define KVM_INST_B_MAX 0x01ffffff
+
+#define KVM_MASK_RT 0x03e00000
+#define KVM_RT_30 0x03c00000
+#define KVM_MASK_RB 0x0000f800
+#define KVM_INST_MFMSR 0x7c0000a6
+#define KVM_INST_MFSPR_SPRG0 0x7c1042a6
+#define KVM_INST_MFSPR_SPRG1 0x7c1142a6
+#define KVM_INST_MFSPR_SPRG2 0x7c1242a6
+#define KVM_INST_MFSPR_SPRG3 0x7c1342a6
+#define KVM_INST_MFSPR_SRR0 0x7c1a02a6
+#define KVM_INST_MFSPR_SRR1 0x7c1b02a6
+#define KVM_INST_MFSPR_DAR 0x7c1302a6
+#define KVM_INST_MFSPR_DSISR 0x7c1202a6
+
+#define KVM_INST_MTSPR_SPRG0 0x7c1043a6
+#define KVM_INST_MTSPR_SPRG1 0x7c1143a6
+#define KVM_INST_MTSPR_SPRG2 0x7c1243a6
+#define KVM_INST_MTSPR_SPRG3 0x7c1343a6
+#define KVM_INST_MTSPR_SRR0 0x7c1a03a6
+#define KVM_INST_MTSPR_SRR1 0x7c1b03a6
+#define KVM_INST_MTSPR_DAR 0x7c1303a6
+#define KVM_INST_MTSPR_DSISR 0x7c1203a6
+
+#define KVM_INST_TLBSYNC 0x7c00046c
+#define KVM_INST_MTMSRD_L0 0x7c000164
+#define KVM_INST_MTMSRD_L1 0x7c010164
+#define KVM_INST_MTMSR 0x7c000124
+
+#define KVM_INST_WRTEEI_0 0x7c000146
+#define KVM_INST_WRTEEI_1 0x7c008146
+
+#define KVM_INST_MTSRIN 0x7c0001e4
+
+static bool kvm_patching_worked = true;
+static char kvm_tmp[1024 * 1024];
+static int kvm_tmp_index;
+
+static inline void kvm_patch_ins(u32 *inst, u32 new_inst)
+{
+ *inst = new_inst;
+ flush_icache_range((ulong)inst, (ulong)inst + 4);
+}
+
+static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
+{
+#ifdef CONFIG_64BIT
+ kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
+#else
+ kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000fffc));
+#endif
+}
+
+static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
+{
+#ifdef CONFIG_64BIT
+ kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
+#else
+ kvm_patch_ins(inst, KVM_INST_LWZ | rt | ((addr + 4) & 0x0000fffc));
+#endif
+}
+
+static void kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
+{
+ kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000ffff));
+}
+
+static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
+{
+#ifdef CONFIG_64BIT
+ kvm_patch_ins(inst, KVM_INST_STD | rt | (addr & 0x0000fffc));
+#else
+ kvm_patch_ins(inst, KVM_INST_STW | rt | ((addr + 4) & 0x0000fffc));
+#endif
+}
+
+static void kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
+{
+ kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc));
+}
+
+static void kvm_patch_ins_nop(u32 *inst)
+{
+ kvm_patch_ins(inst, KVM_INST_NOP);
+}
+
+static void kvm_patch_ins_b(u32 *inst, int addr)
+{
+#ifdef CONFIG_RELOCATABLE
+ /* On relocatable kernels interrupts handlers and our code
+ can be in different regions, so we don't patch them */
+
+ extern u32 __end_interrupts;
+ if ((ulong)inst < (ulong)&__end_interrupts)
+ return;
+#endif
+
+ kvm_patch_ins(inst, KVM_INST_B | (addr & KVM_INST_B_MASK));
+}
+
+static u32 *kvm_alloc(int len)
+{
+ u32 *p;
+
+ if ((kvm_tmp_index + len) > ARRAY_SIZE(kvm_tmp)) {
+ printk(KERN_ERR "KVM: No more space (%d + %d)\n",
+ kvm_tmp_index, len);
+ kvm_patching_worked = false;
+ return NULL;
+ }
+
+ p = (void*)&kvm_tmp[kvm_tmp_index];
+ kvm_tmp_index += len;
+
+ return p;
+}
+
+extern u32 kvm_emulate_mtmsrd_branch_offs;
+extern u32 kvm_emulate_mtmsrd_reg_offs;
+extern u32 kvm_emulate_mtmsrd_orig_ins_offs;
+extern u32 kvm_emulate_mtmsrd_len;
+extern u32 kvm_emulate_mtmsrd[];
+
+static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
+{
+ u32 *p;
+ int distance_start;
+ int distance_end;
+ ulong next_inst;
+
+ p = kvm_alloc(kvm_emulate_mtmsrd_len * 4);
+ if (!p)
+ return;
+
+ /* Find out where we are and put everything there */
+ distance_start = (ulong)p - (ulong)inst;
+ next_inst = ((ulong)inst + 4);
+ distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsrd_branch_offs];
+
+ /* Make sure we only write valid b instructions */
+ if (distance_start > KVM_INST_B_MAX) {
+ kvm_patching_worked = false;
+ return;
+ }
+
+ /* Modify the chunk to fit the invocation */
+ memcpy(p, kvm_emulate_mtmsrd, kvm_emulate_mtmsrd_len * 4);
+ p[kvm_emulate_mtmsrd_branch_offs] |= distance_end & KVM_INST_B_MASK;
+ switch (get_rt(rt)) {
+ case 30:
+ kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs],
+ magic_var(scratch2), KVM_RT_30);
+ break;
+ case 31:
+ kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs],
+ magic_var(scratch1), KVM_RT_30);
+ break;
+ default:
+ p[kvm_emulate_mtmsrd_reg_offs] |= rt;
+ break;
+ }
+
+ p[kvm_emulate_mtmsrd_orig_ins_offs] = *inst;
+ flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsrd_len * 4);
+
+ /* Patch the invocation */
+ kvm_patch_ins_b(inst, distance_start);
+}
+
+extern u32 kvm_emulate_mtmsr_branch_offs;
+extern u32 kvm_emulate_mtmsr_reg1_offs;
+extern u32 kvm_emulate_mtmsr_reg2_offs;
+extern u32 kvm_emulate_mtmsr_orig_ins_offs;
+extern u32 kvm_emulate_mtmsr_len;
+extern u32 kvm_emulate_mtmsr[];
+
+static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
+{
+ u32 *p;
+ int distance_start;
+ int distance_end;
+ ulong next_inst;
+
+ p = kvm_alloc(kvm_emulate_mtmsr_len * 4);
+ if (!p)
+ return;
+
+ /* Find out where we are and put everything there */
+ distance_start = (ulong)p - (ulong)inst;
+ next_inst = ((ulong)inst + 4);
+ distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsr_branch_offs];
+
+ /* Make sure we only write valid b instructions */
+ if (distance_start > KVM_INST_B_MAX) {
+ kvm_patching_worked = false;
+ return;
+ }
+
+ /* Modify the chunk to fit the invocation */
+ memcpy(p, kvm_emulate_mtmsr, kvm_emulate_mtmsr_len * 4);
+ p[kvm_emulate_mtmsr_branch_offs] |= distance_end & KVM_INST_B_MASK;
+
+ /* Make clobbered registers work too */
+ switch (get_rt(rt)) {
+ case 30:
+ kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs],
+ magic_var(scratch2), KVM_RT_30);
+ kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs],
+ magic_var(scratch2), KVM_RT_30);
+ break;
+ case 31:
+ kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs],
+ magic_var(scratch1), KVM_RT_30);
+ kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs],
+ magic_var(scratch1), KVM_RT_30);
+ break;
+ default:
+ p[kvm_emulate_mtmsr_reg1_offs] |= rt;
+ p[kvm_emulate_mtmsr_reg2_offs] |= rt;
+ break;
+ }
+
+ p[kvm_emulate_mtmsr_orig_ins_offs] = *inst;
+ flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsr_len * 4);
+
+ /* Patch the invocation */
+ kvm_patch_ins_b(inst, distance_start);
+}
+
+#ifdef CONFIG_BOOKE
+
+extern u32 kvm_emulate_wrteei_branch_offs;
+extern u32 kvm_emulate_wrteei_ee_offs;
+extern u32 kvm_emulate_wrteei_len;
+extern u32 kvm_emulate_wrteei[];
+
+static void kvm_patch_ins_wrteei(u32 *inst)
+{
+ u32 *p;
+ int distance_start;
+ int distance_end;
+ ulong next_inst;
+
+ p = kvm_alloc(kvm_emulate_wrteei_len * 4);
+ if (!p)
+ return;
+
+ /* Find out where we are and put everything there */
+ distance_start = (ulong)p - (ulong)inst;
+ next_inst = ((ulong)inst + 4);
+ distance_end = next_inst - (ulong)&p[kvm_emulate_wrteei_branch_offs];
+
+ /* Make sure we only write valid b instructions */
+ if (distance_start > KVM_INST_B_MAX) {
+ kvm_patching_worked = false;
+ return;
+ }
+
+ /* Modify the chunk to fit the invocation */
+ memcpy(p, kvm_emulate_wrteei, kvm_emulate_wrteei_len * 4);
+ p[kvm_emulate_wrteei_branch_offs] |= distance_end & KVM_INST_B_MASK;
+ p[kvm_emulate_wrteei_ee_offs] |= (*inst & MSR_EE);
+ flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrteei_len * 4);
+
+ /* Patch the invocation */
+ kvm_patch_ins_b(inst, distance_start);
+}
+
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+
+extern u32 kvm_emulate_mtsrin_branch_offs;
+extern u32 kvm_emulate_mtsrin_reg1_offs;
+extern u32 kvm_emulate_mtsrin_reg2_offs;
+extern u32 kvm_emulate_mtsrin_orig_ins_offs;
+extern u32 kvm_emulate_mtsrin_len;
+extern u32 kvm_emulate_mtsrin[];
+
+static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
+{
+ u32 *p;
+ int distance_start;
+ int distance_end;
+ ulong next_inst;
+
+ p = kvm_alloc(kvm_emulate_mtsrin_len * 4);
+ if (!p)
+ return;
+
+ /* Find out where we are and put everything there */
+ distance_start = (ulong)p - (ulong)inst;
+ next_inst = ((ulong)inst + 4);
+ distance_end = next_inst - (ulong)&p[kvm_emulate_mtsrin_branch_offs];
+
+ /* Make sure we only write valid b instructions */
+ if (distance_start > KVM_INST_B_MAX) {
+ kvm_patching_worked = false;
+ return;
+ }
+
+ /* Modify the chunk to fit the invocation */
+ memcpy(p, kvm_emulate_mtsrin, kvm_emulate_mtsrin_len * 4);
+ p[kvm_emulate_mtsrin_branch_offs] |= distance_end & KVM_INST_B_MASK;
+ p[kvm_emulate_mtsrin_reg1_offs] |= (rb << 10);
+ p[kvm_emulate_mtsrin_reg2_offs] |= rt;
+ p[kvm_emulate_mtsrin_orig_ins_offs] = *inst;
+ flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtsrin_len * 4);
+
+ /* Patch the invocation */
+ kvm_patch_ins_b(inst, distance_start);
+}
+
+#endif
+
+static void kvm_map_magic_page(void *data)
+{
+ u32 *features = data;
+
+ ulong in[8];
+ ulong out[8];
+
+ in[0] = KVM_MAGIC_PAGE;
+ in[1] = KVM_MAGIC_PAGE;
+
+ kvm_hypercall(in, out, HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE);
+
+ *features = out[0];
+}
+
+static void kvm_check_ins(u32 *inst, u32 features)
+{
+ u32 _inst = *inst;
+ u32 inst_no_rt = _inst & ~KVM_MASK_RT;
+ u32 inst_rt = _inst & KVM_MASK_RT;
+
+ switch (inst_no_rt) {
+ /* Loads */
+ case KVM_INST_MFMSR:
+ kvm_patch_ins_ld(inst, magic_var(msr), inst_rt);
+ break;
+ case KVM_INST_MFSPR_SPRG0:
+ kvm_patch_ins_ld(inst, magic_var(sprg0), inst_rt);
+ break;
+ case KVM_INST_MFSPR_SPRG1:
+ kvm_patch_ins_ld(inst, magic_var(sprg1), inst_rt);
+ break;
+ case KVM_INST_MFSPR_SPRG2:
+ kvm_patch_ins_ld(inst, magic_var(sprg2), inst_rt);
+ break;
+ case KVM_INST_MFSPR_SPRG3:
+ kvm_patch_ins_ld(inst, magic_var(sprg3), inst_rt);
+ break;
+ case KVM_INST_MFSPR_SRR0:
+ kvm_patch_ins_ld(inst, magic_var(srr0), inst_rt);
+ break;
+ case KVM_INST_MFSPR_SRR1:
+ kvm_patch_ins_ld(inst, magic_var(srr1), inst_rt);
+ break;
+ case KVM_INST_MFSPR_DAR:
+ kvm_patch_ins_ld(inst, magic_var(dar), inst_rt);
+ break;
+ case KVM_INST_MFSPR_DSISR:
+ kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt);
+ break;
+
+ /* Stores */
+ case KVM_INST_MTSPR_SPRG0:
+ kvm_patch_ins_std(inst, magic_var(sprg0), inst_rt);
+ break;
+ case KVM_INST_MTSPR_SPRG1:
+ kvm_patch_ins_std(inst, magic_var(sprg1), inst_rt);
+ break;
+ case KVM_INST_MTSPR_SPRG2:
+ kvm_patch_ins_std(inst, magic_var(sprg2), inst_rt);
+ break;
+ case KVM_INST_MTSPR_SPRG3:
+ kvm_patch_ins_std(inst, magic_var(sprg3), inst_rt);
+ break;
+ case KVM_INST_MTSPR_SRR0:
+ kvm_patch_ins_std(inst, magic_var(srr0), inst_rt);
+ break;
+ case KVM_INST_MTSPR_SRR1:
+ kvm_patch_ins_std(inst, magic_var(srr1), inst_rt);
+ break;
+ case KVM_INST_MTSPR_DAR:
+ kvm_patch_ins_std(inst, magic_var(dar), inst_rt);
+ break;
+ case KVM_INST_MTSPR_DSISR:
+ kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt);
+ break;
+
+ /* Nops */
+ case KVM_INST_TLBSYNC:
+ kvm_patch_ins_nop(inst);
+ break;
+
+ /* Rewrites */
+ case KVM_INST_MTMSRD_L1:
+ kvm_patch_ins_mtmsrd(inst, inst_rt);
+ break;
+ case KVM_INST_MTMSR:
+ case KVM_INST_MTMSRD_L0:
+ kvm_patch_ins_mtmsr(inst, inst_rt);
+ break;
+ }
+
+ switch (inst_no_rt & ~KVM_MASK_RB) {
+#ifdef CONFIG_PPC_BOOK3S_32
+ case KVM_INST_MTSRIN:
+ if (features & KVM_MAGIC_FEAT_SR) {
+ u32 inst_rb = _inst & KVM_MASK_RB;
+ kvm_patch_ins_mtsrin(inst, inst_rt, inst_rb);
+ }
+ break;
+ break;
+#endif
+ }
+
+ switch (_inst) {
+#ifdef CONFIG_BOOKE
+ case KVM_INST_WRTEEI_0:
+ case KVM_INST_WRTEEI_1:
+ kvm_patch_ins_wrteei(inst);
+ break;
+#endif
+ }
+}
+
+static void kvm_use_magic_page(void)
+{
+ u32 *p;
+ u32 *start, *end;
+ u32 tmp;
+ u32 features;
+
+ /* Tell the host to map the magic page to -4096 on all CPUs */
+ on_each_cpu(kvm_map_magic_page, &features, 1);
+
+ /* Quick self-test to see if the mapping works */
+ if (__get_user(tmp, (u32*)KVM_MAGIC_PAGE)) {
+ kvm_patching_worked = false;
+ return;
+ }
+
+ /* Now loop through all code and find instructions */
+ start = (void*)_stext;
+ end = (void*)_etext;
+
+ for (p = start; p < end; p++)
+ kvm_check_ins(p, features);
+
+ printk(KERN_INFO "KVM: Live patching for a fast VM %s\n",
+ kvm_patching_worked ? "worked" : "failed");
+}
+
+unsigned long kvm_hypercall(unsigned long *in,
+ unsigned long *out,
+ unsigned long nr)
+{
+ unsigned long register r0 asm("r0");
+ unsigned long register r3 asm("r3") = in[0];
+ unsigned long register r4 asm("r4") = in[1];
+ unsigned long register r5 asm("r5") = in[2];
+ unsigned long register r6 asm("r6") = in[3];
+ unsigned long register r7 asm("r7") = in[4];
+ unsigned long register r8 asm("r8") = in[5];
+ unsigned long register r9 asm("r9") = in[6];
+ unsigned long register r10 asm("r10") = in[7];
+ unsigned long register r11 asm("r11") = nr;
+ unsigned long register r12 asm("r12");
+
+ asm volatile("bl kvm_hypercall_start"
+ : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),
+ "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11),
+ "=r"(r12)
+ : "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8),
+ "r"(r9), "r"(r10), "r"(r11)
+ : "memory", "cc", "xer", "ctr", "lr");
+
+ out[0] = r4;
+ out[1] = r5;
+ out[2] = r6;
+ out[3] = r7;
+ out[4] = r8;
+ out[5] = r9;
+ out[6] = r10;
+ out[7] = r11;
+
+ return r3;
+}
+EXPORT_SYMBOL_GPL(kvm_hypercall);
+
+static int kvm_para_setup(void)
+{
+ extern u32 kvm_hypercall_start;
+ struct device_node *hyper_node;
+ u32 *insts;
+ int len, i;
+
+ hyper_node = of_find_node_by_path("/hypervisor");
+ if (!hyper_node)
+ return -1;
+
+ insts = (u32*)of_get_property(hyper_node, "hcall-instructions", &len);
+ if (len % 4)
+ return -1;
+ if (len > (4 * 4))
+ return -1;
+
+ for (i = 0; i < (len / 4); i++)
+ kvm_patch_ins(&(&kvm_hypercall_start)[i], insts[i]);
+
+ return 0;
+}
+
+static __init void kvm_free_tmp(void)
+{
+ unsigned long start, end;
+
+ start = (ulong)&kvm_tmp[kvm_tmp_index + (PAGE_SIZE - 1)] & PAGE_MASK;
+ end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK;
+
+ /* Free the tmp space we don't need */
+ for (; start < end; start += PAGE_SIZE) {
+ ClearPageReserved(virt_to_page(start));
+ init_page_count(virt_to_page(start));
+ free_page(start);
+ totalram_pages++;
+ }
+}
+
+static int __init kvm_guest_init(void)
+{
+ if (!kvm_para_available())
+ goto free_tmp;
+
+ if (kvm_para_setup())
+ goto free_tmp;
+
+ if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
+ kvm_use_magic_page();
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Enable napping */
+ powersave_nap = 1;
+#endif
+
+free_tmp:
+ kvm_free_tmp();
+
+ return 0;
+}
+
+postcore_initcall(kvm_guest_init);
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
new file mode 100644
index 0000000..f2b1b25
--- /dev/null
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -0,0 +1,302 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2010
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+
+/* Hypercall entry point. Will be patched with device tree instructions. */
+
+.global kvm_hypercall_start
+kvm_hypercall_start:
+ li r3, -1
+ nop
+ nop
+ nop
+ blr
+
+#define KVM_MAGIC_PAGE (-4096)
+
+#ifdef CONFIG_64BIT
+#define LL64(reg, offs, reg2) ld reg, (offs)(reg2)
+#define STL64(reg, offs, reg2) std reg, (offs)(reg2)
+#else
+#define LL64(reg, offs, reg2) lwz reg, (offs + 4)(reg2)
+#define STL64(reg, offs, reg2) stw reg, (offs + 4)(reg2)
+#endif
+
+#define SCRATCH_SAVE \
+ /* Enable critical section. We are critical if \
+ shared->critical == r1 */ \
+ STL64(r1, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0); \
+ \
+ /* Save state */ \
+ PPC_STL r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0); \
+ PPC_STL r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0); \
+ mfcr r31; \
+ stw r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0);
+
+#define SCRATCH_RESTORE \
+ /* Restore state */ \
+ PPC_LL r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0); \
+ lwz r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0); \
+ mtcr r30; \
+ PPC_LL r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0); \
+ \
+ /* Disable critical section. We are critical if \
+ shared->critical == r1 and r2 is always != r1 */ \
+ STL64(r2, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0);
+
+.global kvm_emulate_mtmsrd
+kvm_emulate_mtmsrd:
+
+ SCRATCH_SAVE
+
+ /* Put MSR & ~(MSR_EE|MSR_RI) in r31 */
+ LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+ lis r30, (~(MSR_EE | MSR_RI))@h
+ ori r30, r30, (~(MSR_EE | MSR_RI))@l
+ and r31, r31, r30
+
+ /* OR the register's (MSR_EE|MSR_RI) on MSR */
+kvm_emulate_mtmsrd_reg:
+ ori r30, r0, 0
+ andi. r30, r30, (MSR_EE|MSR_RI)
+ or r31, r31, r30
+
+ /* Put MSR back into magic page */
+ STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ /* Check if we have to fetch an interrupt */
+ lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
+ cmpwi r31, 0
+ beq+ no_check
+
+ /* Check if we may trigger an interrupt */
+ andi. r30, r30, MSR_EE
+ beq no_check
+
+ SCRATCH_RESTORE
+
+ /* Nag hypervisor */
+kvm_emulate_mtmsrd_orig_ins:
+ tlbsync
+
+ b kvm_emulate_mtmsrd_branch
+
+no_check:
+
+ SCRATCH_RESTORE
+
+ /* Go back to caller */
+kvm_emulate_mtmsrd_branch:
+ b .
+kvm_emulate_mtmsrd_end:
+
+.global kvm_emulate_mtmsrd_branch_offs
+kvm_emulate_mtmsrd_branch_offs:
+ .long (kvm_emulate_mtmsrd_branch - kvm_emulate_mtmsrd) / 4
+
+.global kvm_emulate_mtmsrd_reg_offs
+kvm_emulate_mtmsrd_reg_offs:
+ .long (kvm_emulate_mtmsrd_reg - kvm_emulate_mtmsrd) / 4
+
+.global kvm_emulate_mtmsrd_orig_ins_offs
+kvm_emulate_mtmsrd_orig_ins_offs:
+ .long (kvm_emulate_mtmsrd_orig_ins - kvm_emulate_mtmsrd) / 4
+
+.global kvm_emulate_mtmsrd_len
+kvm_emulate_mtmsrd_len:
+ .long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4
+
+
+#define MSR_SAFE_BITS (MSR_EE | MSR_CE | MSR_ME | MSR_RI)
+#define MSR_CRITICAL_BITS ~MSR_SAFE_BITS
+
+.global kvm_emulate_mtmsr
+kvm_emulate_mtmsr:
+
+ SCRATCH_SAVE
+
+ /* Fetch old MSR in r31 */
+ LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ /* Find the changed bits between old and new MSR */
+kvm_emulate_mtmsr_reg1:
+ ori r30, r0, 0
+ xor r31, r30, r31
+
+ /* Check if we need to really do mtmsr */
+ LOAD_REG_IMMEDIATE(r30, MSR_CRITICAL_BITS)
+ and. r31, r31, r30
+
+ /* No critical bits changed? Maybe we can stay in the guest. */
+ beq maybe_stay_in_guest
+
+do_mtmsr:
+
+ SCRATCH_RESTORE
+
+ /* Just fire off the mtmsr if it's critical */
+kvm_emulate_mtmsr_orig_ins:
+ mtmsr r0
+
+ b kvm_emulate_mtmsr_branch
+
+maybe_stay_in_guest:
+
+ /* Get the target register in r30 */
+kvm_emulate_mtmsr_reg2:
+ ori r30, r0, 0
+
+ /* Check if we have to fetch an interrupt */
+ lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
+ cmpwi r31, 0
+ beq+ no_mtmsr
+
+ /* Check if we may trigger an interrupt */
+ andi. r31, r30, MSR_EE
+ beq no_mtmsr
+
+ b do_mtmsr
+
+no_mtmsr:
+
+ /* Put MSR into magic page because we don't call mtmsr */
+ STL64(r30, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ SCRATCH_RESTORE
+
+ /* Go back to caller */
+kvm_emulate_mtmsr_branch:
+ b .
+kvm_emulate_mtmsr_end:
+
+.global kvm_emulate_mtmsr_branch_offs
+kvm_emulate_mtmsr_branch_offs:
+ .long (kvm_emulate_mtmsr_branch - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_reg1_offs
+kvm_emulate_mtmsr_reg1_offs:
+ .long (kvm_emulate_mtmsr_reg1 - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_reg2_offs
+kvm_emulate_mtmsr_reg2_offs:
+ .long (kvm_emulate_mtmsr_reg2 - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_orig_ins_offs
+kvm_emulate_mtmsr_orig_ins_offs:
+ .long (kvm_emulate_mtmsr_orig_ins - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_len
+kvm_emulate_mtmsr_len:
+ .long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4
+
+
+
+.global kvm_emulate_wrteei
+kvm_emulate_wrteei:
+
+ SCRATCH_SAVE
+
+ /* Fetch old MSR in r31 */
+ LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ /* Remove MSR_EE from old MSR */
+ li r30, 0
+ ori r30, r30, MSR_EE
+ andc r31, r31, r30
+
+ /* OR new MSR_EE onto the old MSR */
+kvm_emulate_wrteei_ee:
+ ori r31, r31, 0
+
+ /* Write new MSR value back */
+ STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ SCRATCH_RESTORE
+
+ /* Go back to caller */
+kvm_emulate_wrteei_branch:
+ b .
+kvm_emulate_wrteei_end:
+
+.global kvm_emulate_wrteei_branch_offs
+kvm_emulate_wrteei_branch_offs:
+ .long (kvm_emulate_wrteei_branch - kvm_emulate_wrteei) / 4
+
+.global kvm_emulate_wrteei_ee_offs
+kvm_emulate_wrteei_ee_offs:
+ .long (kvm_emulate_wrteei_ee - kvm_emulate_wrteei) / 4
+
+.global kvm_emulate_wrteei_len
+kvm_emulate_wrteei_len:
+ .long (kvm_emulate_wrteei_end - kvm_emulate_wrteei) / 4
+
+
+.global kvm_emulate_mtsrin
+kvm_emulate_mtsrin:
+
+ SCRATCH_SAVE
+
+ LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+ andi. r31, r31, MSR_DR | MSR_IR
+ beq kvm_emulate_mtsrin_reg1
+
+ SCRATCH_RESTORE
+
+kvm_emulate_mtsrin_orig_ins:
+ nop
+ b kvm_emulate_mtsrin_branch
+
+kvm_emulate_mtsrin_reg1:
+ /* rX >> 26 */
+ rlwinm r30,r0,6,26,29
+
+kvm_emulate_mtsrin_reg2:
+ stw r0, (KVM_MAGIC_PAGE + KVM_MAGIC_SR)(r30)
+
+ SCRATCH_RESTORE
+
+ /* Go back to caller */
+kvm_emulate_mtsrin_branch:
+ b .
+kvm_emulate_mtsrin_end:
+
+.global kvm_emulate_mtsrin_branch_offs
+kvm_emulate_mtsrin_branch_offs:
+ .long (kvm_emulate_mtsrin_branch - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_reg1_offs
+kvm_emulate_mtsrin_reg1_offs:
+ .long (kvm_emulate_mtsrin_reg1 - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_reg2_offs
+kvm_emulate_mtsrin_reg2_offs:
+ .long (kvm_emulate_mtsrin_reg2 - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_orig_ins_offs
+kvm_emulate_mtsrin_orig_ins_offs:
+ .long (kvm_emulate_mtsrin_orig_ins - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_len
+kvm_emulate_mtsrin_len:
+ .long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 73c0a3f..74d0e74 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -43,7 +43,7 @@
{
int r;
- if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
+ if (strncmp(cur_cpu_spec->platform, "ppc440", 6) == 0)
r = 0;
else
r = -ENOTSUPP;
@@ -72,6 +72,7 @@
/* Since the guest can directly access the timebase, it must know the
* real timebase frequency. Accordingly, it must see the state of
* CCR1[TCS]. */
+ /* XXX CCR1 doesn't exist on all 440 SoCs. */
vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
@@ -123,8 +124,14 @@
if (err)
goto free_vcpu;
+ vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
+ if (!vcpu->arch.shared)
+ goto uninit_vcpu;
+
return vcpu;
+uninit_vcpu:
+ kvm_vcpu_uninit(vcpu);
free_vcpu:
kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
out:
@@ -135,6 +142,7 @@
{
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ free_page((unsigned long)vcpu->arch.shared);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
}
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 9b9b5cd..5f3cff8 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -47,6 +47,7 @@
#ifdef DEBUG
void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
struct kvmppc_44x_tlbe *tlbe;
int i;
@@ -221,14 +222,14 @@
int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
{
- unsigned int as = !!(vcpu->arch.msr & MSR_IS);
+ unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
}
int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
{
- unsigned int as = !!(vcpu->arch.msr & MSR_DS);
+ unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
}
@@ -354,7 +355,7 @@
stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags,
- vcpu->arch.msr & MSR_PR);
+ vcpu->arch.shared->msr & MSR_PR);
stlbe.tid = !(asid & 0xff);
/* Keep track of the reference so we can properly release it later. */
@@ -423,7 +424,7 @@
/* Does it match current guest AS? */
/* XXX what about IS != DS? */
- if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
+ if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
return 0;
gpa = get_tlb_raddr(tlbe);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index a3cef30..e316847 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -17,6 +17,7 @@
#include <linux/kvm_host.h>
#include <linux/err.h>
#include <linux/slab.h>
+#include "trace.h"
#include <asm/reg.h>
#include <asm/cputable.h>
@@ -35,7 +36,6 @@
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
/* #define EXIT_DEBUG */
-/* #define EXIT_DEBUG_SIMPLE */
/* #define DEBUG_EXT */
static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
@@ -105,65 +105,71 @@
kvmppc_giveup_ext(vcpu, MSR_VSX);
}
-#if defined(EXIT_DEBUG)
-static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
-{
- u64 jd = mftb() - vcpu->arch.dec_jiffies;
- return vcpu->arch.dec - jd;
-}
-#endif
-
static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
{
- vcpu->arch.shadow_msr = vcpu->arch.msr;
+ ulong smsr = vcpu->arch.shared->msr;
+
/* Guest MSR values */
- vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE |
- MSR_BE | MSR_DE;
+ smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE;
/* Process MSR values */
- vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR |
- MSR_EE;
+ smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
/* External providers the guest reserved */
- vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext);
+ smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext);
/* 64-bit Process MSR values */
#ifdef CONFIG_PPC_BOOK3S_64
- vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV;
+ smsr |= MSR_ISF | MSR_HV;
#endif
+ vcpu->arch.shadow_msr = smsr;
}
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
{
- ulong old_msr = vcpu->arch.msr;
+ ulong old_msr = vcpu->arch.shared->msr;
#ifdef EXIT_DEBUG
printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
#endif
msr &= to_book3s(vcpu)->msr_mask;
- vcpu->arch.msr = msr;
+ vcpu->arch.shared->msr = msr;
kvmppc_recalc_shadow_msr(vcpu);
- if (msr & (MSR_WE|MSR_POW)) {
+ if (msr & MSR_POW) {
if (!vcpu->arch.pending_exceptions) {
kvm_vcpu_block(vcpu);
vcpu->stat.halt_wakeup++;
+
+ /* Unset POW bit after we woke up */
+ msr &= ~MSR_POW;
+ vcpu->arch.shared->msr = msr;
}
}
- if ((vcpu->arch.msr & (MSR_PR|MSR_IR|MSR_DR)) !=
+ if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) !=
(old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
kvmppc_mmu_flush_segments(vcpu);
kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+
+ /* Preload magic page segment when in kernel mode */
+ if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) {
+ struct kvm_vcpu_arch *a = &vcpu->arch;
+
+ if (msr & MSR_DR)
+ kvmppc_mmu_map_segment(vcpu, a->magic_page_ea);
+ else
+ kvmppc_mmu_map_segment(vcpu, a->magic_page_pa);
+ }
}
/* Preload FPU if it's enabled */
- if (vcpu->arch.msr & MSR_FP)
+ if (vcpu->arch.shared->msr & MSR_FP)
kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
}
void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
{
- vcpu->arch.srr0 = kvmppc_get_pc(vcpu);
- vcpu->arch.srr1 = vcpu->arch.msr | flags;
+ vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu);
+ vcpu->arch.shared->srr1 = vcpu->arch.shared->msr | flags;
kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec);
vcpu->arch.mmu.reset_msr(vcpu);
}
@@ -180,6 +186,7 @@
case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break;
case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break;
case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break;
+ case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL; break;
case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break;
case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break;
case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break;
@@ -199,6 +206,9 @@
{
clear_bit(kvmppc_book3s_vec2irqprio(vec),
&vcpu->arch.pending_exceptions);
+
+ if (!vcpu->arch.pending_exceptions)
+ vcpu->arch.shared->int_pending = 0;
}
void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
@@ -237,13 +247,19 @@
void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
{
- kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
+ unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL;
+
+ if (irq->irq == KVM_INTERRUPT_SET_LEVEL)
+ vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL;
+
+ kvmppc_book3s_queue_irqprio(vcpu, vec);
}
void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
{
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
+ kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
}
int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
@@ -251,14 +267,29 @@
int deliver = 1;
int vec = 0;
ulong flags = 0ULL;
+ ulong crit_raw = vcpu->arch.shared->critical;
+ ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
+ bool crit;
+
+ /* Truncate crit indicators in 32 bit mode */
+ if (!(vcpu->arch.shared->msr & MSR_SF)) {
+ crit_raw &= 0xffffffff;
+ crit_r1 &= 0xffffffff;
+ }
+
+ /* Critical section when crit == r1 */
+ crit = (crit_raw == crit_r1);
+ /* ... and we're in supervisor mode */
+ crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
switch (priority) {
case BOOK3S_IRQPRIO_DECREMENTER:
- deliver = vcpu->arch.msr & MSR_EE;
+ deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit;
vec = BOOK3S_INTERRUPT_DECREMENTER;
break;
case BOOK3S_IRQPRIO_EXTERNAL:
- deliver = vcpu->arch.msr & MSR_EE;
+ case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
+ deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit;
vec = BOOK3S_INTERRUPT_EXTERNAL;
break;
case BOOK3S_IRQPRIO_SYSTEM_RESET:
@@ -320,9 +351,27 @@
return deliver;
}
+/*
+ * This function determines if an irqprio should be cleared once issued.
+ */
+static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
+{
+ switch (priority) {
+ case BOOK3S_IRQPRIO_DECREMENTER:
+ /* DEC interrupts get cleared by mtdec */
+ return false;
+ case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
+ /* External interrupts get cleared by userspace */
+ return false;
+ }
+
+ return true;
+}
+
void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
{
unsigned long *pending = &vcpu->arch.pending_exceptions;
+ unsigned long old_pending = vcpu->arch.pending_exceptions;
unsigned int priority;
#ifdef EXIT_DEBUG
@@ -332,8 +381,7 @@
priority = __ffs(*pending);
while (priority < BOOK3S_IRQPRIO_MAX) {
if (kvmppc_book3s_irqprio_deliver(vcpu, priority) &&
- (priority != BOOK3S_IRQPRIO_DECREMENTER)) {
- /* DEC interrupts get cleared by mtdec */
+ clear_irqprio(vcpu, priority)) {
clear_bit(priority, &vcpu->arch.pending_exceptions);
break;
}
@@ -342,6 +390,12 @@
BITS_PER_BYTE * sizeof(*pending),
priority + 1);
}
+
+ /* Tell the guest about our interrupt status */
+ if (*pending)
+ vcpu->arch.shared->int_pending = 1;
+ else if (old_pending)
+ vcpu->arch.shared->int_pending = 0;
}
void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
@@ -398,6 +452,25 @@
}
}
+pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ ulong mp_pa = vcpu->arch.magic_page_pa;
+
+ /* Magic page override */
+ if (unlikely(mp_pa) &&
+ unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) ==
+ ((mp_pa & PAGE_MASK) & KVM_PAM))) {
+ ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
+ pfn_t pfn;
+
+ pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT;
+ get_page(pfn_to_page(pfn));
+ return pfn;
+ }
+
+ return gfn_to_pfn(vcpu->kvm, gfn);
+}
+
/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
* make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
* emulate 32 bytes dcbz length.
@@ -415,8 +488,10 @@
int i;
hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
- if (is_error_page(hpage))
+ if (is_error_page(hpage)) {
+ kvm_release_page_clean(hpage);
return;
+ }
hpage_offset = pte->raddr & ~PAGE_MASK;
hpage_offset &= ~0xFFFULL;
@@ -437,14 +512,14 @@
static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
struct kvmppc_pte *pte)
{
- int relocated = (vcpu->arch.msr & (data ? MSR_DR : MSR_IR));
+ int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR));
int r;
if (relocated) {
r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data);
} else {
pte->eaddr = eaddr;
- pte->raddr = eaddr & 0xffffffff;
+ pte->raddr = eaddr & KVM_PAM;
pte->vpage = VSID_REAL | eaddr >> 12;
pte->may_read = true;
pte->may_write = true;
@@ -533,6 +608,13 @@
static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
{
+ ulong mp_pa = vcpu->arch.magic_page_pa;
+
+ if (unlikely(mp_pa) &&
+ unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) {
+ return 1;
+ }
+
return kvm_is_visible_gfn(vcpu->kvm, gfn);
}
@@ -545,8 +627,8 @@
int page_found = 0;
struct kvmppc_pte pte;
bool is_mmio = false;
- bool dr = (vcpu->arch.msr & MSR_DR) ? true : false;
- bool ir = (vcpu->arch.msr & MSR_IR) ? true : false;
+ bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false;
+ bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false;
u64 vsid;
relocated = data ? dr : ir;
@@ -558,12 +640,12 @@
pte.may_execute = true;
pte.may_read = true;
pte.may_write = true;
- pte.raddr = eaddr & 0xffffffff;
+ pte.raddr = eaddr & KVM_PAM;
pte.eaddr = eaddr;
pte.vpage = eaddr >> 12;
}
- switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
+ switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
case 0:
pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
break;
@@ -571,7 +653,7 @@
case MSR_IR:
vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
- if ((vcpu->arch.msr & (MSR_DR|MSR_IR)) == MSR_DR)
+ if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR)
pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
else
pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
@@ -594,20 +676,23 @@
if (page_found == -ENOENT) {
/* Page not found in guest PTE entries */
- vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
- to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr;
- vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
+ vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
+ vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
+ vcpu->arch.shared->msr |=
+ (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
kvmppc_book3s_queue_irqprio(vcpu, vec);
} else if (page_found == -EPERM) {
/* Storage protection */
- vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
- to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
- to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT;
- vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
+ vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
+ vcpu->arch.shared->dsisr =
+ to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
+ vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
+ vcpu->arch.shared->msr |=
+ (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
kvmppc_book3s_queue_irqprio(vcpu, vec);
} else if (page_found == -EINVAL) {
/* Page not found in guest SLB */
- vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
+ vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
} else if (!is_mmio &&
kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
@@ -695,9 +780,11 @@
ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
if (ret == -ENOENT) {
- vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 33, 1);
- vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 34, 36, 0);
- vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0);
+ ulong msr = vcpu->arch.shared->msr;
+
+ msr = kvmppc_set_field(msr, 33, 33, 1);
+ msr = kvmppc_set_field(msr, 34, 36, 0);
+ vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0);
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
return EMULATE_AGAIN;
}
@@ -736,7 +823,7 @@
if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
return RESUME_GUEST;
- if (!(vcpu->arch.msr & msr)) {
+ if (!(vcpu->arch.shared->msr & msr)) {
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
return RESUME_GUEST;
}
@@ -796,16 +883,8 @@
run->exit_reason = KVM_EXIT_UNKNOWN;
run->ready_for_interrupt_injection = 1;
-#ifdef EXIT_DEBUG
- printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n",
- exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu),
- kvmppc_get_dec(vcpu), to_svcpu(vcpu)->shadow_srr1);
-#elif defined (EXIT_DEBUG_SIMPLE)
- if ((exit_nr != 0x900) && (exit_nr != 0x500))
- printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n",
- exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu),
- vcpu->arch.msr);
-#endif
+
+ trace_kvm_book3s_exit(exit_nr, vcpu);
kvm_resched(vcpu);
switch (exit_nr) {
case BOOK3S_INTERRUPT_INST_STORAGE:
@@ -836,9 +915,9 @@
kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
r = RESUME_GUEST;
} else {
- vcpu->arch.msr |= to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
+ vcpu->arch.shared->msr |=
+ to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
- kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
r = RESUME_GUEST;
}
break;
@@ -861,17 +940,16 @@
if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) {
r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
} else {
- vcpu->arch.dear = dar;
- to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr;
+ vcpu->arch.shared->dar = dar;
+ vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
- kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFUL);
r = RESUME_GUEST;
}
break;
}
case BOOK3S_INTERRUPT_DATA_SEGMENT:
if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
- vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
+ vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
kvmppc_book3s_queue_irqprio(vcpu,
BOOK3S_INTERRUPT_DATA_SEGMENT);
}
@@ -904,7 +982,7 @@
program_interrupt:
flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull;
- if (vcpu->arch.msr & MSR_PR) {
+ if (vcpu->arch.shared->msr & MSR_PR) {
#ifdef EXIT_DEBUG
printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
#endif
@@ -941,10 +1019,10 @@
break;
}
case BOOK3S_INTERRUPT_SYSCALL:
- // XXX make user settable
if (vcpu->arch.osi_enabled &&
(((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
(((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
+ /* MOL hypercalls */
u64 *gprs = run->osi.gprs;
int i;
@@ -953,8 +1031,13 @@
gprs[i] = kvmppc_get_gpr(vcpu, i);
vcpu->arch.osi_needed = 1;
r = RESUME_HOST_NV;
-
+ } else if (!(vcpu->arch.shared->msr & MSR_PR) &&
+ (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
+ /* KVM PV hypercalls */
+ kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
+ r = RESUME_GUEST;
} else {
+ /* Guest syscalls */
vcpu->stat.syscall_exits++;
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
r = RESUME_GUEST;
@@ -989,9 +1072,9 @@
}
case BOOK3S_INTERRUPT_ALIGNMENT:
if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
- to_book3s(vcpu)->dsisr = kvmppc_alignment_dsisr(vcpu,
+ vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu,
kvmppc_get_last_inst(vcpu));
- vcpu->arch.dear = kvmppc_alignment_dar(vcpu,
+ vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu,
kvmppc_get_last_inst(vcpu));
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
}
@@ -1031,9 +1114,7 @@
}
}
-#ifdef EXIT_DEBUG
- printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, kvmppc_get_pc(vcpu), r);
-#endif
+ trace_kvm_book3s_reenter(r, vcpu);
return r;
}
@@ -1052,14 +1133,14 @@
regs->ctr = kvmppc_get_ctr(vcpu);
regs->lr = kvmppc_get_lr(vcpu);
regs->xer = kvmppc_get_xer(vcpu);
- regs->msr = vcpu->arch.msr;
- regs->srr0 = vcpu->arch.srr0;
- regs->srr1 = vcpu->arch.srr1;
+ regs->msr = vcpu->arch.shared->msr;
+ regs->srr0 = vcpu->arch.shared->srr0;
+ regs->srr1 = vcpu->arch.shared->srr1;
regs->pid = vcpu->arch.pid;
- regs->sprg0 = vcpu->arch.sprg0;
- regs->sprg1 = vcpu->arch.sprg1;
- regs->sprg2 = vcpu->arch.sprg2;
- regs->sprg3 = vcpu->arch.sprg3;
+ regs->sprg0 = vcpu->arch.shared->sprg0;
+ regs->sprg1 = vcpu->arch.shared->sprg1;
+ regs->sprg2 = vcpu->arch.shared->sprg2;
+ regs->sprg3 = vcpu->arch.shared->sprg3;
regs->sprg5 = vcpu->arch.sprg4;
regs->sprg6 = vcpu->arch.sprg5;
regs->sprg7 = vcpu->arch.sprg6;
@@ -1080,12 +1161,12 @@
kvmppc_set_lr(vcpu, regs->lr);
kvmppc_set_xer(vcpu, regs->xer);
kvmppc_set_msr(vcpu, regs->msr);
- vcpu->arch.srr0 = regs->srr0;
- vcpu->arch.srr1 = regs->srr1;
- vcpu->arch.sprg0 = regs->sprg0;
- vcpu->arch.sprg1 = regs->sprg1;
- vcpu->arch.sprg2 = regs->sprg2;
- vcpu->arch.sprg3 = regs->sprg3;
+ vcpu->arch.shared->srr0 = regs->srr0;
+ vcpu->arch.shared->srr1 = regs->srr1;
+ vcpu->arch.shared->sprg0 = regs->sprg0;
+ vcpu->arch.shared->sprg1 = regs->sprg1;
+ vcpu->arch.shared->sprg2 = regs->sprg2;
+ vcpu->arch.shared->sprg3 = regs->sprg3;
vcpu->arch.sprg5 = regs->sprg4;
vcpu->arch.sprg6 = regs->sprg5;
vcpu->arch.sprg7 = regs->sprg6;
@@ -1111,10 +1192,9 @@
sregs->u.s.ppc64.slb[i].slbv = vcpu3s->slb[i].origv;
}
} else {
- for (i = 0; i < 16; i++) {
- sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw;
- sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw;
- }
+ for (i = 0; i < 16; i++)
+ sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i];
+
for (i = 0; i < 8; i++) {
sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
@@ -1225,6 +1305,7 @@
struct kvmppc_vcpu_book3s *vcpu_book3s;
struct kvm_vcpu *vcpu;
int err = -ENOMEM;
+ unsigned long p;
vcpu_book3s = vmalloc(sizeof(struct kvmppc_vcpu_book3s));
if (!vcpu_book3s)
@@ -1242,6 +1323,12 @@
if (err)
goto free_shadow_vcpu;
+ p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
+ /* the real shared page fills the last 4k of our page */
+ vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096);
+ if (!p)
+ goto uninit_vcpu;
+
vcpu->arch.host_retip = kvm_return_point;
vcpu->arch.host_msr = mfmsr();
#ifdef CONFIG_PPC_BOOK3S_64
@@ -1268,10 +1355,12 @@
err = kvmppc_mmu_init(vcpu);
if (err < 0)
- goto free_shadow_vcpu;
+ goto uninit_vcpu;
return vcpu;
+uninit_vcpu:
+ kvm_vcpu_uninit(vcpu);
free_shadow_vcpu:
kfree(vcpu_book3s->shadow_vcpu);
free_vcpu:
@@ -1284,6 +1373,7 @@
{
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+ free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
kvm_vcpu_uninit(vcpu);
kfree(vcpu_book3s->shadow_vcpu);
vfree(vcpu_book3s);
@@ -1346,7 +1436,7 @@
local_irq_enable();
/* Preload FPU if it's enabled */
- if (vcpu->arch.msr & MSR_FP)
+ if (vcpu->arch.shared->msr & MSR_FP)
kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
ret = __kvmppc_vcpu_entry(kvm_run, vcpu);
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 3292d76..c8cefdd 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -58,14 +58,39 @@
#endif
}
+static inline u32 sr_vsid(u32 sr_raw)
+{
+ return sr_raw & 0x0fffffff;
+}
+
+static inline bool sr_valid(u32 sr_raw)
+{
+ return (sr_raw & 0x80000000) ? false : true;
+}
+
+static inline bool sr_ks(u32 sr_raw)
+{
+ return (sr_raw & 0x40000000) ? true: false;
+}
+
+static inline bool sr_kp(u32 sr_raw)
+{
+ return (sr_raw & 0x20000000) ? true: false;
+}
+
+static inline bool sr_nx(u32 sr_raw)
+{
+ return (sr_raw & 0x10000000) ? true: false;
+}
+
static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *pte, bool data);
static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
u64 *vsid);
-static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t eaddr)
+static u32 find_sr(struct kvm_vcpu *vcpu, gva_t eaddr)
{
- return &vcpu_book3s->sr[(eaddr >> 28) & 0xf];
+ return vcpu->arch.shared->sr[(eaddr >> 28) & 0xf];
}
static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
@@ -87,7 +112,7 @@
}
static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3s,
- struct kvmppc_sr *sre, gva_t eaddr,
+ u32 sre, gva_t eaddr,
bool primary)
{
u32 page, hash, pteg, htabmask;
@@ -96,7 +121,7 @@
page = (eaddr & 0x0FFFFFFF) >> 12;
htabmask = ((vcpu_book3s->sdr1 & 0x1FF) << 16) | 0xFFC0;
- hash = ((sre->vsid ^ page) << 6);
+ hash = ((sr_vsid(sre) ^ page) << 6);
if (!primary)
hash = ~hash;
hash &= htabmask;
@@ -104,8 +129,8 @@
pteg = (vcpu_book3s->sdr1 & 0xffff0000) | hash;
dprintk("MMU: pc=0x%lx eaddr=0x%lx sdr1=0x%llx pteg=0x%x vsid=0x%x\n",
- vcpu_book3s->vcpu.arch.pc, eaddr, vcpu_book3s->sdr1, pteg,
- sre->vsid);
+ kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg,
+ sr_vsid(sre));
r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
if (kvm_is_error_hva(r))
@@ -113,10 +138,9 @@
return r | (pteg & ~PAGE_MASK);
}
-static u32 kvmppc_mmu_book3s_32_get_ptem(struct kvmppc_sr *sre, gva_t eaddr,
- bool primary)
+static u32 kvmppc_mmu_book3s_32_get_ptem(u32 sre, gva_t eaddr, bool primary)
{
- return ((eaddr & 0x0fffffff) >> 22) | (sre->vsid << 7) |
+ return ((eaddr & 0x0fffffff) >> 22) | (sr_vsid(sre) << 7) |
(primary ? 0 : 0x40) | 0x80000000;
}
@@ -133,7 +157,7 @@
else
bat = &vcpu_book3s->ibat[i];
- if (vcpu->arch.msr & MSR_PR) {
+ if (vcpu->arch.shared->msr & MSR_PR) {
if (!bat->vp)
continue;
} else {
@@ -180,17 +204,17 @@
bool primary)
{
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
- struct kvmppc_sr *sre;
+ u32 sre;
hva_t ptegp;
u32 pteg[16];
u32 ptem = 0;
int i;
int found = 0;
- sre = find_sr(vcpu_book3s, eaddr);
+ sre = find_sr(vcpu, eaddr);
dprintk_pte("SR 0x%lx: vsid=0x%x, raw=0x%x\n", eaddr >> 28,
- sre->vsid, sre->raw);
+ sr_vsid(sre), sre);
pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
@@ -214,8 +238,8 @@
pte->raddr = (pteg[i+1] & ~(0xFFFULL)) | (eaddr & 0xFFF);
pp = pteg[i+1] & 3;
- if ((sre->Kp && (vcpu->arch.msr & MSR_PR)) ||
- (sre->Ks && !(vcpu->arch.msr & MSR_PR)))
+ if ((sr_kp(sre) && (vcpu->arch.shared->msr & MSR_PR)) ||
+ (sr_ks(sre) && !(vcpu->arch.shared->msr & MSR_PR)))
pp |= 4;
pte->may_write = false;
@@ -269,7 +293,7 @@
dprintk_pte("KVM MMU: No PTE found (sdr1=0x%llx ptegp=0x%lx)\n",
to_book3s(vcpu)->sdr1, ptegp);
for (i=0; i<16; i+=2) {
- dprintk_pte(" %02d: 0x%x - 0x%x (0x%llx)\n",
+ dprintk_pte(" %02d: 0x%x - 0x%x (0x%x)\n",
i, pteg[i], pteg[i+1], ptem);
}
}
@@ -281,8 +305,24 @@
struct kvmppc_pte *pte, bool data)
{
int r;
+ ulong mp_ea = vcpu->arch.magic_page_ea;
pte->eaddr = eaddr;
+
+ /* Magic page override */
+ if (unlikely(mp_ea) &&
+ unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
+ !(vcpu->arch.shared->msr & MSR_PR)) {
+ pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
+ pte->raddr = vcpu->arch.magic_page_pa | (pte->raddr & 0xfff);
+ pte->raddr &= KVM_PAM;
+ pte->may_execute = true;
+ pte->may_read = true;
+ pte->may_write = true;
+
+ return 0;
+ }
+
r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data);
if (r < 0)
r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, true);
@@ -295,30 +335,13 @@
static u32 kvmppc_mmu_book3s_32_mfsrin(struct kvm_vcpu *vcpu, u32 srnum)
{
- return to_book3s(vcpu)->sr[srnum].raw;
+ return vcpu->arch.shared->sr[srnum];
}
static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
ulong value)
{
- struct kvmppc_sr *sre;
-
- sre = &to_book3s(vcpu)->sr[srnum];
-
- /* Flush any left-over shadows from the previous SR */
-
- /* XXX Not necessary? */
- /* kvmppc_mmu_pte_flush(vcpu, ((u64)sre->vsid) << 28, 0xf0000000ULL); */
-
- /* And then put in the new SR */
- sre->raw = value;
- sre->vsid = (value & 0x0fffffff);
- sre->valid = (value & 0x80000000) ? false : true;
- sre->Ks = (value & 0x40000000) ? true : false;
- sre->Kp = (value & 0x20000000) ? true : false;
- sre->nx = (value & 0x10000000) ? true : false;
-
- /* Map the new segment */
+ vcpu->arch.shared->sr[srnum] = value;
kvmppc_mmu_map_segment(vcpu, srnum << SID_SHIFT);
}
@@ -331,19 +354,19 @@
u64 *vsid)
{
ulong ea = esid << SID_SHIFT;
- struct kvmppc_sr *sr;
+ u32 sr;
u64 gvsid = esid;
- if (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
- sr = find_sr(to_book3s(vcpu), ea);
- if (sr->valid)
- gvsid = sr->vsid;
+ if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+ sr = find_sr(vcpu, ea);
+ if (sr_valid(sr))
+ gvsid = sr_vsid(sr);
}
/* In case we only have one of MSR_IR or MSR_DR set, let's put
that in the real-mode context (and hope RM doesn't access
high memory) */
- switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
+ switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
case 0:
*vsid = VSID_REAL | esid;
break;
@@ -354,8 +377,8 @@
*vsid = VSID_REAL_DR | gvsid;
break;
case MSR_DR|MSR_IR:
- if (sr->valid)
- *vsid = sr->vsid;
+ if (sr_valid(sr))
+ *vsid = sr_vsid(sr);
else
*vsid = VSID_BAT | gvsid;
break;
@@ -363,7 +386,7 @@
BUG();
}
- if (vcpu->arch.msr & MSR_PR)
+ if (vcpu->arch.shared->msr & MSR_PR)
*vsid |= VSID_PR;
return 0;
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 0b51ef8..9fecbfb 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -19,7 +19,6 @@
*/
#include <linux/kvm_host.h>
-#include <linux/hash.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
@@ -77,7 +76,14 @@
* a hash, so we don't waste cycles on looping */
static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid)
{
- return hash_64(gvsid, SID_MAP_BITS);
+ return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK));
}
@@ -86,7 +92,7 @@
struct kvmppc_sid_map *map;
u16 sid_map_mask;
- if (vcpu->arch.msr & MSR_PR)
+ if (vcpu->arch.shared->msr & MSR_PR)
gvsid |= VSID_PR;
sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
@@ -147,8 +153,8 @@
struct hpte_cache *pte;
/* Get host physical address for gpa */
- hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
- if (kvm_is_error_hva(hpaddr)) {
+ hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
+ if (is_error_pfn(hpaddr)) {
printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
orig_pte->eaddr);
return -EINVAL;
@@ -253,7 +259,7 @@
u16 sid_map_mask;
static int backwards_map = 0;
- if (vcpu->arch.msr & MSR_PR)
+ if (vcpu->arch.shared->msr & MSR_PR)
gvsid |= VSID_PR;
/* We might get collisions that trap in preceding order, so let's
@@ -269,18 +275,15 @@
backwards_map = !backwards_map;
/* Uh-oh ... out of mappings. Let's flush! */
- if (vcpu_book3s->vsid_next >= vcpu_book3s->vsid_max) {
- vcpu_book3s->vsid_next = vcpu_book3s->vsid_first;
+ if (vcpu_book3s->vsid_next >= VSID_POOL_SIZE) {
+ vcpu_book3s->vsid_next = 0;
memset(vcpu_book3s->sid_map, 0,
sizeof(struct kvmppc_sid_map) * SID_MAP_NUM);
kvmppc_mmu_pte_flush(vcpu, 0, 0);
kvmppc_mmu_flush_segments(vcpu);
}
- map->host_vsid = vcpu_book3s->vsid_next;
-
- /* Would have to be 111 to be completely aligned with the rest of
- Linux, but that is just way too little space! */
- vcpu_book3s->vsid_next+=1;
+ map->host_vsid = vcpu_book3s->vsid_pool[vcpu_book3s->vsid_next];
+ vcpu_book3s->vsid_next++;
map->guest_vsid = gvsid;
map->valid = true;
@@ -327,40 +330,38 @@
void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
{
+ int i;
+
kvmppc_mmu_hpte_destroy(vcpu);
preempt_disable();
- __destroy_context(to_book3s(vcpu)->context_id);
+ for (i = 0; i < SID_CONTEXTS; i++)
+ __destroy_context(to_book3s(vcpu)->context_id[i]);
preempt_enable();
}
/* From mm/mmu_context_hash32.c */
-#define CTX_TO_VSID(ctx) (((ctx) * (897 * 16)) & 0xffffff)
+#define CTX_TO_VSID(c, id) ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff)
int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
int err;
ulong sdr1;
+ int i;
+ int j;
- err = __init_new_context();
- if (err < 0)
- return -1;
- vcpu3s->context_id = err;
+ for (i = 0; i < SID_CONTEXTS; i++) {
+ err = __init_new_context();
+ if (err < 0)
+ goto init_fail;
+ vcpu3s->context_id[i] = err;
- vcpu3s->vsid_max = CTX_TO_VSID(vcpu3s->context_id + 1) - 1;
- vcpu3s->vsid_first = CTX_TO_VSID(vcpu3s->context_id);
+ /* Remember context id for this combination */
+ for (j = 0; j < 16; j++)
+ vcpu3s->vsid_pool[(i * 16) + j] = CTX_TO_VSID(err, j);
+ }
-#if 0 /* XXX still doesn't guarantee uniqueness */
- /* We could collide with the Linux vsid space because the vsid
- * wraps around at 24 bits. We're safe if we do our own space
- * though, so let's always set the highest bit. */
-
- vcpu3s->vsid_max |= 0x00800000;
- vcpu3s->vsid_first |= 0x00800000;
-#endif
- BUG_ON(vcpu3s->vsid_max < vcpu3s->vsid_first);
-
- vcpu3s->vsid_next = vcpu3s->vsid_first;
+ vcpu3s->vsid_next = 0;
/* Remember where the HTAB is */
asm ( "mfsdr1 %0" : "=r"(sdr1) );
@@ -370,4 +371,14 @@
kvmppc_mmu_hpte_init(vcpu);
return 0;
+
+init_fail:
+ for (j = 0; j < i; j++) {
+ if (!vcpu3s->context_id[j])
+ continue;
+
+ __destroy_context(to_book3s(vcpu)->context_id[j]);
+ }
+
+ return -1;
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 4025ea2..d7889ef 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -163,6 +163,22 @@
bool found = false;
bool perm_err = false;
int second = 0;
+ ulong mp_ea = vcpu->arch.magic_page_ea;
+
+ /* Magic page override */
+ if (unlikely(mp_ea) &&
+ unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
+ !(vcpu->arch.shared->msr & MSR_PR)) {
+ gpte->eaddr = eaddr;
+ gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
+ gpte->raddr = vcpu->arch.magic_page_pa | (gpte->raddr & 0xfff);
+ gpte->raddr &= KVM_PAM;
+ gpte->may_execute = true;
+ gpte->may_read = true;
+ gpte->may_write = true;
+
+ return 0;
+ }
slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu_book3s, eaddr);
if (!slbe)
@@ -180,9 +196,9 @@
goto no_page_found;
}
- if ((vcpu->arch.msr & MSR_PR) && slbe->Kp)
+ if ((vcpu->arch.shared->msr & MSR_PR) && slbe->Kp)
key = 4;
- else if (!(vcpu->arch.msr & MSR_PR) && slbe->Ks)
+ else if (!(vcpu->arch.shared->msr & MSR_PR) && slbe->Ks)
key = 4;
for (i=0; i<16; i+=2) {
@@ -381,7 +397,7 @@
for (i = 1; i < vcpu_book3s->slb_nr; i++)
vcpu_book3s->slb[i].valid = false;
- if (vcpu->arch.msr & MSR_IR) {
+ if (vcpu->arch.shared->msr & MSR_IR) {
kvmppc_mmu_flush_segments(vcpu);
kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
}
@@ -445,14 +461,15 @@
ulong ea = esid << SID_SHIFT;
struct kvmppc_slb *slb;
u64 gvsid = esid;
+ ulong mp_ea = vcpu->arch.magic_page_ea;
- if (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
+ if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), ea);
if (slb)
gvsid = slb->vsid;
}
- switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
+ switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
case 0:
*vsid = VSID_REAL | esid;
break;
@@ -464,7 +481,7 @@
break;
case MSR_DR|MSR_IR:
if (!slb)
- return -ENOENT;
+ goto no_slb;
*vsid = gvsid;
break;
@@ -473,10 +490,21 @@
break;
}
- if (vcpu->arch.msr & MSR_PR)
+ if (vcpu->arch.shared->msr & MSR_PR)
*vsid |= VSID_PR;
return 0;
+
+no_slb:
+ /* Catch magic page case */
+ if (unlikely(mp_ea) &&
+ unlikely(esid == (mp_ea >> SID_SHIFT)) &&
+ !(vcpu->arch.shared->msr & MSR_PR)) {
+ *vsid = VSID_REAL | esid;
+ return 0;
+ }
+
+ return -EINVAL;
}
static bool kvmppc_mmu_book3s_64_is_dcbz32(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 384179a..fa2f084 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -20,7 +20,6 @@
*/
#include <linux/kvm_host.h>
-#include <linux/hash.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
@@ -28,24 +27,9 @@
#include <asm/machdep.h>
#include <asm/mmu_context.h>
#include <asm/hw_irq.h>
+#include "trace.h"
#define PTE_SIZE 12
-#define VSID_ALL 0
-
-/* #define DEBUG_MMU */
-/* #define DEBUG_SLB */
-
-#ifdef DEBUG_MMU
-#define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__)
-#else
-#define dprintk_mmu(a, ...) do { } while(0)
-#endif
-
-#ifdef DEBUG_SLB
-#define dprintk_slb(a, ...) printk(KERN_INFO a, __VA_ARGS__)
-#else
-#define dprintk_slb(a, ...) do { } while(0)
-#endif
void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
{
@@ -58,34 +42,39 @@
* a hash, so we don't waste cycles on looping */
static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid)
{
- return hash_64(gvsid, SID_MAP_BITS);
+ return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK));
}
+
static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
{
struct kvmppc_sid_map *map;
u16 sid_map_mask;
- if (vcpu->arch.msr & MSR_PR)
+ if (vcpu->arch.shared->msr & MSR_PR)
gvsid |= VSID_PR;
sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
map = &to_book3s(vcpu)->sid_map[sid_map_mask];
- if (map->guest_vsid == gvsid) {
- dprintk_slb("SLB: Searching: 0x%llx -> 0x%llx\n",
- gvsid, map->host_vsid);
+ if (map->valid && (map->guest_vsid == gvsid)) {
+ trace_kvm_book3s_slb_found(gvsid, map->host_vsid);
return map;
}
map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - sid_map_mask];
- if (map->guest_vsid == gvsid) {
- dprintk_slb("SLB: Searching 0x%llx -> 0x%llx\n",
- gvsid, map->host_vsid);
+ if (map->valid && (map->guest_vsid == gvsid)) {
+ trace_kvm_book3s_slb_found(gvsid, map->host_vsid);
return map;
}
- dprintk_slb("SLB: Searching %d/%d: 0x%llx -> not found\n",
- sid_map_mask, SID_MAP_MASK - sid_map_mask, gvsid);
+ trace_kvm_book3s_slb_fail(sid_map_mask, gvsid);
return NULL;
}
@@ -101,18 +90,13 @@
struct kvmppc_sid_map *map;
/* Get host physical address for gpa */
- hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
- if (kvm_is_error_hva(hpaddr)) {
+ hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
+ if (is_error_pfn(hpaddr)) {
printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr);
return -EINVAL;
}
hpaddr <<= PAGE_SHIFT;
-#if PAGE_SHIFT == 12
-#elif PAGE_SHIFT == 16
- hpaddr |= orig_pte->raddr & 0xf000;
-#else
-#error Unknown page size
-#endif
+ hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK);
/* and write the mapping ea -> hpa into the pt */
vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid);
@@ -161,10 +145,7 @@
} else {
struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu);
- dprintk_mmu("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx\n",
- ((rflags & HPTE_R_PP) == 3) ? '-' : 'w',
- (rflags & HPTE_R_N) ? '-' : 'x',
- orig_pte->eaddr, hpteg, va, orig_pte->vpage, hpaddr);
+ trace_kvm_book3s_64_mmu_map(rflags, hpteg, va, hpaddr, orig_pte);
/* The ppc_md code may give us a secondary entry even though we
asked for a primary. Fix up. */
@@ -191,7 +172,7 @@
u16 sid_map_mask;
static int backwards_map = 0;
- if (vcpu->arch.msr & MSR_PR)
+ if (vcpu->arch.shared->msr & MSR_PR)
gvsid |= VSID_PR;
/* We might get collisions that trap in preceding order, so let's
@@ -219,8 +200,7 @@
map->guest_vsid = gvsid;
map->valid = true;
- dprintk_slb("SLB: New mapping at %d: 0x%llx -> 0x%llx\n",
- sid_map_mask, gvsid, map->host_vsid);
+ trace_kvm_book3s_slb_map(sid_map_mask, gvsid, map->host_vsid);
return map;
}
@@ -292,7 +272,7 @@
to_svcpu(vcpu)->slb[slb_index].esid = slb_esid;
to_svcpu(vcpu)->slb[slb_index].vsid = slb_vsid;
- dprintk_slb("slbmte %#llx, %#llx\n", slb_vsid, slb_esid);
+ trace_kvm_book3s_slbmte(slb_vsid, slb_esid);
return 0;
}
@@ -306,7 +286,7 @@
void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
{
kvmppc_mmu_hpte_destroy(vcpu);
- __destroy_context(to_book3s(vcpu)->context_id);
+ __destroy_context(to_book3s(vcpu)->context_id[0]);
}
int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
@@ -317,10 +297,10 @@
err = __init_new_context();
if (err < 0)
return -1;
- vcpu3s->context_id = err;
+ vcpu3s->context_id[0] = err;
- vcpu3s->vsid_max = ((vcpu3s->context_id + 1) << USER_ESID_BITS) - 1;
- vcpu3s->vsid_first = vcpu3s->context_id << USER_ESID_BITS;
+ vcpu3s->vsid_max = ((vcpu3s->context_id[0] + 1) << USER_ESID_BITS) - 1;
+ vcpu3s->vsid_first = vcpu3s->context_id[0] << USER_ESID_BITS;
vcpu3s->vsid_next = vcpu3s->vsid_first;
kvmppc_mmu_hpte_init(vcpu);
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index c85f906..4668465 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -73,8 +73,8 @@
switch (get_xop(inst)) {
case OP_19_XOP_RFID:
case OP_19_XOP_RFI:
- kvmppc_set_pc(vcpu, vcpu->arch.srr0);
- kvmppc_set_msr(vcpu, vcpu->arch.srr1);
+ kvmppc_set_pc(vcpu, vcpu->arch.shared->srr0);
+ kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
*advance = 0;
break;
@@ -86,14 +86,15 @@
case 31:
switch (get_xop(inst)) {
case OP_31_XOP_MFMSR:
- kvmppc_set_gpr(vcpu, get_rt(inst), vcpu->arch.msr);
+ kvmppc_set_gpr(vcpu, get_rt(inst),
+ vcpu->arch.shared->msr);
break;
case OP_31_XOP_MTMSRD:
{
ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst));
if (inst & 0x10000) {
- vcpu->arch.msr &= ~(MSR_RI | MSR_EE);
- vcpu->arch.msr |= rs & (MSR_RI | MSR_EE);
+ vcpu->arch.shared->msr &= ~(MSR_RI | MSR_EE);
+ vcpu->arch.shared->msr |= rs & (MSR_RI | MSR_EE);
} else
kvmppc_set_msr(vcpu, rs);
break;
@@ -204,14 +205,14 @@
ra = kvmppc_get_gpr(vcpu, get_ra(inst));
addr = (ra + rb) & ~31ULL;
- if (!(vcpu->arch.msr & MSR_SF))
+ if (!(vcpu->arch.shared->msr & MSR_SF))
addr &= 0xffffffff;
vaddr = addr;
r = kvmppc_st(vcpu, &addr, 32, zeros, true);
if ((r == -ENOENT) || (r == -EPERM)) {
*advance = 0;
- vcpu->arch.dear = vaddr;
+ vcpu->arch.shared->dar = vaddr;
to_svcpu(vcpu)->fault_dar = vaddr;
dsisr = DSISR_ISSTORE;
@@ -220,7 +221,7 @@
else if (r == -EPERM)
dsisr |= DSISR_PROTFAULT;
- to_book3s(vcpu)->dsisr = dsisr;
+ vcpu->arch.shared->dsisr = dsisr;
to_svcpu(vcpu)->fault_dsisr = dsisr;
kvmppc_book3s_queue_irqprio(vcpu,
@@ -263,7 +264,7 @@
}
}
-static u32 kvmppc_read_bat(struct kvm_vcpu *vcpu, int sprn)
+static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn)
{
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
struct kvmppc_bat *bat;
@@ -285,35 +286,7 @@
BUG();
}
- if (sprn % 2)
- return bat->raw >> 32;
- else
- return bat->raw;
-}
-
-static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val)
-{
- struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
- struct kvmppc_bat *bat;
-
- switch (sprn) {
- case SPRN_IBAT0U ... SPRN_IBAT3L:
- bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2];
- break;
- case SPRN_IBAT4U ... SPRN_IBAT7L:
- bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)];
- break;
- case SPRN_DBAT0U ... SPRN_DBAT3L:
- bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2];
- break;
- case SPRN_DBAT4U ... SPRN_DBAT7L:
- bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)];
- break;
- default:
- BUG();
- }
-
- kvmppc_set_bat(vcpu, bat, !(sprn % 2), val);
+ return bat;
}
int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
@@ -326,10 +299,10 @@
to_book3s(vcpu)->sdr1 = spr_val;
break;
case SPRN_DSISR:
- to_book3s(vcpu)->dsisr = spr_val;
+ vcpu->arch.shared->dsisr = spr_val;
break;
case SPRN_DAR:
- vcpu->arch.dear = spr_val;
+ vcpu->arch.shared->dar = spr_val;
break;
case SPRN_HIOR:
to_book3s(vcpu)->hior = spr_val;
@@ -338,12 +311,16 @@
case SPRN_IBAT4U ... SPRN_IBAT7L:
case SPRN_DBAT0U ... SPRN_DBAT3L:
case SPRN_DBAT4U ... SPRN_DBAT7L:
- kvmppc_write_bat(vcpu, sprn, (u32)spr_val);
+ {
+ struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn);
+
+ kvmppc_set_bat(vcpu, bat, !(sprn % 2), (u32)spr_val);
/* BAT writes happen so rarely that we're ok to flush
* everything here */
kvmppc_mmu_pte_flush(vcpu, 0, 0);
kvmppc_mmu_flush_segments(vcpu);
break;
+ }
case SPRN_HID0:
to_book3s(vcpu)->hid[0] = spr_val;
break;
@@ -433,16 +410,24 @@
case SPRN_IBAT4U ... SPRN_IBAT7L:
case SPRN_DBAT0U ... SPRN_DBAT3L:
case SPRN_DBAT4U ... SPRN_DBAT7L:
- kvmppc_set_gpr(vcpu, rt, kvmppc_read_bat(vcpu, sprn));
+ {
+ struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn);
+
+ if (sprn % 2)
+ kvmppc_set_gpr(vcpu, rt, bat->raw >> 32);
+ else
+ kvmppc_set_gpr(vcpu, rt, bat->raw);
+
break;
+ }
case SPRN_SDR1:
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
break;
case SPRN_DSISR:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->dsisr);
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dsisr);
break;
case SPRN_DAR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear);
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar);
break;
case SPRN_HIOR:
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior);
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index 4868d4a..79751d8 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -21,6 +21,7 @@
#include <linux/kvm_host.h>
#include <linux/hash.h>
#include <linux/slab.h>
+#include "trace.h"
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
@@ -30,14 +31,6 @@
#define PTE_SIZE 12
-/* #define DEBUG_MMU */
-
-#ifdef DEBUG_MMU
-#define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__)
-#else
-#define dprintk_mmu(a, ...) do { } while(0)
-#endif
-
static struct kmem_cache *hpte_cache;
static inline u64 kvmppc_mmu_hash_pte(u64 eaddr)
@@ -45,6 +38,12 @@
return hash_64(eaddr >> PTE_SIZE, HPTEG_HASH_BITS_PTE);
}
+static inline u64 kvmppc_mmu_hash_pte_long(u64 eaddr)
+{
+ return hash_64((eaddr & 0x0ffff000) >> PTE_SIZE,
+ HPTEG_HASH_BITS_PTE_LONG);
+}
+
static inline u64 kvmppc_mmu_hash_vpte(u64 vpage)
{
return hash_64(vpage & 0xfffffffffULL, HPTEG_HASH_BITS_VPTE);
@@ -60,77 +59,128 @@
{
u64 index;
+ trace_kvm_book3s_mmu_map(pte);
+
+ spin_lock(&vcpu->arch.mmu_lock);
+
/* Add to ePTE list */
index = kvmppc_mmu_hash_pte(pte->pte.eaddr);
- hlist_add_head(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]);
+ hlist_add_head_rcu(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]);
+
+ /* Add to ePTE_long list */
+ index = kvmppc_mmu_hash_pte_long(pte->pte.eaddr);
+ hlist_add_head_rcu(&pte->list_pte_long,
+ &vcpu->arch.hpte_hash_pte_long[index]);
/* Add to vPTE list */
index = kvmppc_mmu_hash_vpte(pte->pte.vpage);
- hlist_add_head(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]);
+ hlist_add_head_rcu(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]);
/* Add to vPTE_long list */
index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage);
- hlist_add_head(&pte->list_vpte_long,
- &vcpu->arch.hpte_hash_vpte_long[index]);
+ hlist_add_head_rcu(&pte->list_vpte_long,
+ &vcpu->arch.hpte_hash_vpte_long[index]);
+
+ spin_unlock(&vcpu->arch.mmu_lock);
+}
+
+static void free_pte_rcu(struct rcu_head *head)
+{
+ struct hpte_cache *pte = container_of(head, struct hpte_cache, rcu_head);
+ kmem_cache_free(hpte_cache, pte);
}
static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
{
- dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n",
- pte->pte.eaddr, pte->pte.vpage, pte->host_va);
+ trace_kvm_book3s_mmu_invalidate(pte);
/* Different for 32 and 64 bit */
kvmppc_mmu_invalidate_pte(vcpu, pte);
+ spin_lock(&vcpu->arch.mmu_lock);
+
+ /* pte already invalidated in between? */
+ if (hlist_unhashed(&pte->list_pte)) {
+ spin_unlock(&vcpu->arch.mmu_lock);
+ return;
+ }
+
+ hlist_del_init_rcu(&pte->list_pte);
+ hlist_del_init_rcu(&pte->list_pte_long);
+ hlist_del_init_rcu(&pte->list_vpte);
+ hlist_del_init_rcu(&pte->list_vpte_long);
+
if (pte->pte.may_write)
kvm_release_pfn_dirty(pte->pfn);
else
kvm_release_pfn_clean(pte->pfn);
- hlist_del(&pte->list_pte);
- hlist_del(&pte->list_vpte);
- hlist_del(&pte->list_vpte_long);
+ spin_unlock(&vcpu->arch.mmu_lock);
vcpu->arch.hpte_cache_count--;
- kmem_cache_free(hpte_cache, pte);
+ call_rcu(&pte->rcu_head, free_pte_rcu);
}
static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu)
{
struct hpte_cache *pte;
- struct hlist_node *node, *tmp;
+ struct hlist_node *node;
int i;
+ rcu_read_lock();
+
for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i];
- hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long)
+ hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
invalidate_pte(vcpu, pte);
}
+
+ rcu_read_unlock();
}
static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea)
{
struct hlist_head *list;
- struct hlist_node *node, *tmp;
+ struct hlist_node *node;
struct hpte_cache *pte;
/* Find the list of entries in the map */
list = &vcpu->arch.hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)];
+ rcu_read_lock();
+
/* Check the list for matching entries and invalidate */
- hlist_for_each_entry_safe(pte, node, tmp, list, list_pte)
+ hlist_for_each_entry_rcu(pte, node, list, list_pte)
if ((pte->pte.eaddr & ~0xfffUL) == guest_ea)
invalidate_pte(vcpu, pte);
+
+ rcu_read_unlock();
+}
+
+static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea)
+{
+ struct hlist_head *list;
+ struct hlist_node *node;
+ struct hpte_cache *pte;
+
+ /* Find the list of entries in the map */
+ list = &vcpu->arch.hpte_hash_pte_long[
+ kvmppc_mmu_hash_pte_long(guest_ea)];
+
+ rcu_read_lock();
+
+ /* Check the list for matching entries and invalidate */
+ hlist_for_each_entry_rcu(pte, node, list, list_pte_long)
+ if ((pte->pte.eaddr & 0x0ffff000UL) == guest_ea)
+ invalidate_pte(vcpu, pte);
+
+ rcu_read_unlock();
}
void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
{
- u64 i;
-
- dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n",
- vcpu->arch.hpte_cache_count, guest_ea, ea_mask);
-
+ trace_kvm_book3s_mmu_flush("", vcpu, guest_ea, ea_mask);
guest_ea &= ea_mask;
switch (ea_mask) {
@@ -138,9 +188,7 @@
kvmppc_mmu_pte_flush_page(vcpu, guest_ea);
break;
case 0x0ffff000:
- /* 32-bit flush w/o segment, go through all possible segments */
- for (i = 0; i < 0x100000000ULL; i += 0x10000000ULL)
- kvmppc_mmu_pte_flush(vcpu, guest_ea | i, ~0xfffUL);
+ kvmppc_mmu_pte_flush_long(vcpu, guest_ea);
break;
case 0:
/* Doing a complete flush -> start from scratch */
@@ -156,39 +204,46 @@
static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp)
{
struct hlist_head *list;
- struct hlist_node *node, *tmp;
+ struct hlist_node *node;
struct hpte_cache *pte;
u64 vp_mask = 0xfffffffffULL;
list = &vcpu->arch.hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)];
+ rcu_read_lock();
+
/* Check the list for matching entries and invalidate */
- hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte)
+ hlist_for_each_entry_rcu(pte, node, list, list_vpte)
if ((pte->pte.vpage & vp_mask) == guest_vp)
invalidate_pte(vcpu, pte);
+
+ rcu_read_unlock();
}
/* Flush with mask 0xffffff000 */
static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp)
{
struct hlist_head *list;
- struct hlist_node *node, *tmp;
+ struct hlist_node *node;
struct hpte_cache *pte;
u64 vp_mask = 0xffffff000ULL;
list = &vcpu->arch.hpte_hash_vpte_long[
kvmppc_mmu_hash_vpte_long(guest_vp)];
+ rcu_read_lock();
+
/* Check the list for matching entries and invalidate */
- hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long)
+ hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
if ((pte->pte.vpage & vp_mask) == guest_vp)
invalidate_pte(vcpu, pte);
+
+ rcu_read_unlock();
}
void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
{
- dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n",
- vcpu->arch.hpte_cache_count, guest_vp, vp_mask);
+ trace_kvm_book3s_mmu_flush("v", vcpu, guest_vp, vp_mask);
guest_vp &= vp_mask;
switch(vp_mask) {
@@ -206,21 +261,24 @@
void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
{
- struct hlist_node *node, *tmp;
+ struct hlist_node *node;
struct hpte_cache *pte;
int i;
- dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx - 0x%lx\n",
- vcpu->arch.hpte_cache_count, pa_start, pa_end);
+ trace_kvm_book3s_mmu_flush("p", vcpu, pa_start, pa_end);
+
+ rcu_read_lock();
for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i];
- hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long)
+ hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
if ((pte->pte.raddr >= pa_start) &&
(pte->pte.raddr < pa_end))
invalidate_pte(vcpu, pte);
}
+
+ rcu_read_unlock();
}
struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
@@ -254,11 +312,15 @@
/* init hpte lookup hashes */
kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte,
ARRAY_SIZE(vcpu->arch.hpte_hash_pte));
+ kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte_long,
+ ARRAY_SIZE(vcpu->arch.hpte_hash_pte_long));
kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte,
ARRAY_SIZE(vcpu->arch.hpte_hash_vpte));
kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte_long,
ARRAY_SIZE(vcpu->arch.hpte_hash_vpte_long));
+ spin_lock_init(&vcpu->arch.mmu_lock);
+
return 0;
}
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index 35a701f..7b0ee96c 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -165,14 +165,15 @@
static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
{
u64 dsisr;
+ struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared;
- vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 36, 0);
- vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0);
- vcpu->arch.dear = eaddr;
+ shared->msr = kvmppc_set_field(shared->msr, 33, 36, 0);
+ shared->msr = kvmppc_set_field(shared->msr, 42, 47, 0);
+ shared->dar = eaddr;
/* Page Fault */
dsisr = kvmppc_set_field(0, 33, 33, 1);
if (is_store)
- to_book3s(vcpu)->dsisr = kvmppc_set_field(dsisr, 38, 38, 1);
+ shared->dsisr = kvmppc_set_field(dsisr, 38, 38, 1);
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
}
@@ -658,7 +659,7 @@
if (!kvmppc_inst_is_paired_single(vcpu, inst))
return EMULATE_FAIL;
- if (!(vcpu->arch.msr & MSR_FP)) {
+ if (!(vcpu->arch.shared->msr & MSR_FP)) {
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL);
return EMULATE_AGAIN;
}
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 506d5c3..2b9c908 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -202,8 +202,25 @@
#if defined(CONFIG_PPC_BOOK3S_32)
#define STACK_LR INT_FRAME_SIZE+4
+
+/* load_up_xxx have to run with MSR_DR=0 on Book3S_32 */
+#define MSR_EXT_START \
+ PPC_STL r20, _NIP(r1); \
+ mfmsr r20; \
+ LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE); \
+ andc r3,r20,r3; /* Disable DR,EE */ \
+ mtmsr r3; \
+ sync
+
+#define MSR_EXT_END \
+ mtmsr r20; /* Enable DR,EE */ \
+ sync; \
+ PPC_LL r20, _NIP(r1)
+
#elif defined(CONFIG_PPC_BOOK3S_64)
#define STACK_LR _LINK
+#define MSR_EXT_START
+#define MSR_EXT_END
#endif
/*
@@ -215,19 +232,12 @@
PPC_STLU r1, -INT_FRAME_SIZE(r1); \
mflr r3; \
PPC_STL r3, STACK_LR(r1); \
- PPC_STL r20, _NIP(r1); \
- mfmsr r20; \
- LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE); \
- andc r3,r20,r3; /* Disable DR,EE */ \
- mtmsr r3; \
- sync; \
+ MSR_EXT_START; \
\
bl FUNC(load_up_ ## what); \
\
- mtmsr r20; /* Enable DR,EE */ \
- sync; \
+ MSR_EXT_END; \
PPC_LL r3, STACK_LR(r1); \
- PPC_LL r20, _NIP(r1); \
mtlr r3; \
addi r1, r1, INT_FRAME_SIZE; \
blr
@@ -242,10 +252,10 @@
.global kvmppc_trampoline_lowmem
kvmppc_trampoline_lowmem:
- .long kvmppc_handler_lowmem_trampoline - CONFIG_KERNEL_START
+ PPC_LONG kvmppc_handler_lowmem_trampoline - CONFIG_KERNEL_START
.global kvmppc_trampoline_enter
kvmppc_trampoline_enter:
- .long kvmppc_handler_trampoline_enter - CONFIG_KERNEL_START
+ PPC_LONG kvmppc_handler_trampoline_enter - CONFIG_KERNEL_START
#include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 8d4e35f..77575d0 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -62,9 +62,10 @@
{
int i;
- printk("pc: %08lx msr: %08lx\n", vcpu->arch.pc, vcpu->arch.msr);
+ printk("pc: %08lx msr: %08llx\n", vcpu->arch.pc, vcpu->arch.shared->msr);
printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
- printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1);
+ printk("srr0: %08llx srr1: %08llx\n", vcpu->arch.shared->srr0,
+ vcpu->arch.shared->srr1);
printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
@@ -130,13 +131,19 @@
void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
{
- kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL);
+ unsigned int prio = BOOKE_IRQPRIO_EXTERNAL;
+
+ if (irq->irq == KVM_INTERRUPT_SET_LEVEL)
+ prio = BOOKE_IRQPRIO_EXTERNAL_LEVEL;
+
+ kvmppc_booke_queue_irqprio(vcpu, prio);
}
void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
{
clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
+ clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
}
/* Deliver the interrupt of the corresponding priority, if possible. */
@@ -146,6 +153,26 @@
int allowed = 0;
ulong uninitialized_var(msr_mask);
bool update_esr = false, update_dear = false;
+ ulong crit_raw = vcpu->arch.shared->critical;
+ ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
+ bool crit;
+ bool keep_irq = false;
+
+ /* Truncate crit indicators in 32 bit mode */
+ if (!(vcpu->arch.shared->msr & MSR_SF)) {
+ crit_raw &= 0xffffffff;
+ crit_r1 &= 0xffffffff;
+ }
+
+ /* Critical section when crit == r1 */
+ crit = (crit_raw == crit_r1);
+ /* ... and we're in supervisor mode */
+ crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
+
+ if (priority == BOOKE_IRQPRIO_EXTERNAL_LEVEL) {
+ priority = BOOKE_IRQPRIO_EXTERNAL;
+ keep_irq = true;
+ }
switch (priority) {
case BOOKE_IRQPRIO_DTLB_MISS:
@@ -169,36 +196,38 @@
break;
case BOOKE_IRQPRIO_CRITICAL:
case BOOKE_IRQPRIO_WATCHDOG:
- allowed = vcpu->arch.msr & MSR_CE;
+ allowed = vcpu->arch.shared->msr & MSR_CE;
msr_mask = MSR_ME;
break;
case BOOKE_IRQPRIO_MACHINE_CHECK:
- allowed = vcpu->arch.msr & MSR_ME;
+ allowed = vcpu->arch.shared->msr & MSR_ME;
msr_mask = 0;
break;
case BOOKE_IRQPRIO_EXTERNAL:
case BOOKE_IRQPRIO_DECREMENTER:
case BOOKE_IRQPRIO_FIT:
- allowed = vcpu->arch.msr & MSR_EE;
+ allowed = vcpu->arch.shared->msr & MSR_EE;
+ allowed = allowed && !crit;
msr_mask = MSR_CE|MSR_ME|MSR_DE;
break;
case BOOKE_IRQPRIO_DEBUG:
- allowed = vcpu->arch.msr & MSR_DE;
+ allowed = vcpu->arch.shared->msr & MSR_DE;
msr_mask = MSR_ME;
break;
}
if (allowed) {
- vcpu->arch.srr0 = vcpu->arch.pc;
- vcpu->arch.srr1 = vcpu->arch.msr;
+ vcpu->arch.shared->srr0 = vcpu->arch.pc;
+ vcpu->arch.shared->srr1 = vcpu->arch.shared->msr;
vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
if (update_esr == true)
vcpu->arch.esr = vcpu->arch.queued_esr;
if (update_dear == true)
- vcpu->arch.dear = vcpu->arch.queued_dear;
- kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
+ vcpu->arch.shared->dar = vcpu->arch.queued_dear;
+ kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask);
- clear_bit(priority, &vcpu->arch.pending_exceptions);
+ if (!keep_irq)
+ clear_bit(priority, &vcpu->arch.pending_exceptions);
}
return allowed;
@@ -208,6 +237,7 @@
void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
{
unsigned long *pending = &vcpu->arch.pending_exceptions;
+ unsigned long old_pending = vcpu->arch.pending_exceptions;
unsigned int priority;
priority = __ffs(*pending);
@@ -219,6 +249,12 @@
BITS_PER_BYTE * sizeof(*pending),
priority + 1);
}
+
+ /* Tell the guest about our interrupt status */
+ if (*pending)
+ vcpu->arch.shared->int_pending = 1;
+ else if (old_pending)
+ vcpu->arch.shared->int_pending = 0;
}
/**
@@ -265,7 +301,7 @@
break;
case BOOKE_INTERRUPT_PROGRAM:
- if (vcpu->arch.msr & MSR_PR) {
+ if (vcpu->arch.shared->msr & MSR_PR) {
/* Program traps generated by user-level software must be handled
* by the guest kernel. */
kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr);
@@ -337,7 +373,15 @@
break;
case BOOKE_INTERRUPT_SYSCALL:
- kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
+ if (!(vcpu->arch.shared->msr & MSR_PR) &&
+ (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
+ /* KVM PV hypercalls */
+ kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
+ r = RESUME_GUEST;
+ } else {
+ /* Guest syscalls */
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
+ }
kvmppc_account_exit(vcpu, SYSCALL_EXITS);
r = RESUME_GUEST;
break;
@@ -466,15 +510,19 @@
/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
+ int i;
+
vcpu->arch.pc = 0;
- vcpu->arch.msr = 0;
+ vcpu->arch.shared->msr = 0;
kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
vcpu->arch.shadow_pid = 1;
- /* Eye-catching number so we know if the guest takes an interrupt
- * before it's programmed its own IVPR. */
+ /* Eye-catching numbers so we know if the guest takes an interrupt
+ * before it's programmed its own IVPR/IVORs. */
vcpu->arch.ivpr = 0x55550000;
+ for (i = 0; i < BOOKE_IRQPRIO_MAX; i++)
+ vcpu->arch.ivor[i] = 0x7700 | i * 4;
kvmppc_init_timing_stats(vcpu);
@@ -490,14 +538,14 @@
regs->ctr = vcpu->arch.ctr;
regs->lr = vcpu->arch.lr;
regs->xer = kvmppc_get_xer(vcpu);
- regs->msr = vcpu->arch.msr;
- regs->srr0 = vcpu->arch.srr0;
- regs->srr1 = vcpu->arch.srr1;
+ regs->msr = vcpu->arch.shared->msr;
+ regs->srr0 = vcpu->arch.shared->srr0;
+ regs->srr1 = vcpu->arch.shared->srr1;
regs->pid = vcpu->arch.pid;
- regs->sprg0 = vcpu->arch.sprg0;
- regs->sprg1 = vcpu->arch.sprg1;
- regs->sprg2 = vcpu->arch.sprg2;
- regs->sprg3 = vcpu->arch.sprg3;
+ regs->sprg0 = vcpu->arch.shared->sprg0;
+ regs->sprg1 = vcpu->arch.shared->sprg1;
+ regs->sprg2 = vcpu->arch.shared->sprg2;
+ regs->sprg3 = vcpu->arch.shared->sprg3;
regs->sprg5 = vcpu->arch.sprg4;
regs->sprg6 = vcpu->arch.sprg5;
regs->sprg7 = vcpu->arch.sprg6;
@@ -518,12 +566,12 @@
vcpu->arch.lr = regs->lr;
kvmppc_set_xer(vcpu, regs->xer);
kvmppc_set_msr(vcpu, regs->msr);
- vcpu->arch.srr0 = regs->srr0;
- vcpu->arch.srr1 = regs->srr1;
- vcpu->arch.sprg0 = regs->sprg0;
- vcpu->arch.sprg1 = regs->sprg1;
- vcpu->arch.sprg2 = regs->sprg2;
- vcpu->arch.sprg3 = regs->sprg3;
+ vcpu->arch.shared->srr0 = regs->srr0;
+ vcpu->arch.shared->srr1 = regs->srr1;
+ vcpu->arch.shared->sprg0 = regs->sprg0;
+ vcpu->arch.shared->sprg1 = regs->sprg1;
+ vcpu->arch.shared->sprg2 = regs->sprg2;
+ vcpu->arch.shared->sprg3 = regs->sprg3;
vcpu->arch.sprg5 = regs->sprg4;
vcpu->arch.sprg6 = regs->sprg5;
vcpu->arch.sprg7 = regs->sprg6;
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index d59bcca..492bb70 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -46,7 +46,9 @@
#define BOOKE_IRQPRIO_FIT 17
#define BOOKE_IRQPRIO_DECREMENTER 18
#define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19
-#define BOOKE_IRQPRIO_MAX 19
+/* Internal pseudo-irqprio for level triggered externals */
+#define BOOKE_IRQPRIO_EXTERNAL_LEVEL 20
+#define BOOKE_IRQPRIO_MAX 20
extern unsigned long kvmppc_booke_handlers;
@@ -54,12 +56,12 @@
* changing. */
static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
{
- if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
+ if ((new_msr & MSR_PR) != (vcpu->arch.shared->msr & MSR_PR))
kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
- vcpu->arch.msr = new_msr;
+ vcpu->arch.shared->msr = new_msr;
- if (vcpu->arch.msr & MSR_WE) {
+ if (vcpu->arch.shared->msr & MSR_WE) {
kvm_vcpu_block(vcpu);
kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
};
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index cbc790e..1260f5f2 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -31,8 +31,8 @@
static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
{
- vcpu->arch.pc = vcpu->arch.srr0;
- kvmppc_set_msr(vcpu, vcpu->arch.srr1);
+ vcpu->arch.pc = vcpu->arch.shared->srr0;
+ kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
}
int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
@@ -62,7 +62,7 @@
case OP_31_XOP_MFMSR:
rt = get_rt(inst);
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.msr);
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr);
kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
break;
@@ -74,13 +74,13 @@
case OP_31_XOP_WRTEE:
rs = get_rs(inst);
- vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+ vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE)
| (kvmppc_get_gpr(vcpu, rs) & MSR_EE);
kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
break;
case OP_31_XOP_WRTEEI:
- vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+ vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE)
| (inst & MSR_EE);
kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
break;
@@ -105,7 +105,7 @@
switch (sprn) {
case SPRN_DEAR:
- vcpu->arch.dear = spr_val; break;
+ vcpu->arch.shared->dar = spr_val; break;
case SPRN_ESR:
vcpu->arch.esr = spr_val; break;
case SPRN_DBCR0:
@@ -200,7 +200,7 @@
case SPRN_IVPR:
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break;
case SPRN_DEAR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear); break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break;
case SPRN_ESR:
kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break;
case SPRN_DBCR0:
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 380a78c..0498469 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -415,7 +415,8 @@
lwz r8, VCPU_GPR(r8)(r4)
lwz r3, VCPU_PC(r4)
mtsrr0 r3
- lwz r3, VCPU_MSR(r4)
+ lwz r3, VCPU_SHARED(r4)
+ lwz r3, VCPU_SHARED_MSR(r3)
oris r3, r3, KVMPPC_MSR_MASK@h
ori r3, r3, KVMPPC_MSR_MASK@l
mtsrr1 r3
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index e8a00b0..71750f2 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -117,8 +117,14 @@
if (err)
goto uninit_vcpu;
+ vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
+ if (!vcpu->arch.shared)
+ goto uninit_tlb;
+
return vcpu;
+uninit_tlb:
+ kvmppc_e500_tlb_uninit(vcpu_e500);
uninit_vcpu:
kvm_vcpu_uninit(vcpu);
free_vcpu:
@@ -131,6 +137,7 @@
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ free_page((unsigned long)vcpu->arch.shared);
kvmppc_e500_tlb_uninit(vcpu_e500);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 21011e1..d6d6d47 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -226,8 +226,7 @@
kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
stlbe->mas1 = 0;
- trace_kvm_stlb_inval(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
- stlbe->mas3, stlbe->mas7);
+ trace_kvm_stlb_inval(index_of(tlbsel, esel));
}
static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
@@ -298,7 +297,8 @@
/* Get reference to new page. */
new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
if (is_error_page(new_page)) {
- printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
+ printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n",
+ (long)gfn);
kvm_release_page_clean(new_page);
return;
}
@@ -314,10 +314,10 @@
| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
stlbe->mas2 = (gvaddr & MAS2_EPN)
| e500_shadow_mas2_attrib(gtlbe->mas2,
- vcpu_e500->vcpu.arch.msr & MSR_PR);
+ vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
stlbe->mas3 = (hpaddr & MAS3_RPN)
| e500_shadow_mas3_attrib(gtlbe->mas3,
- vcpu_e500->vcpu.arch.msr & MSR_PR);
+ vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
@@ -576,28 +576,28 @@
int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
{
- unsigned int as = !!(vcpu->arch.msr & MSR_IS);
+ unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
}
int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
{
- unsigned int as = !!(vcpu->arch.msr & MSR_DS);
+ unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
}
void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
{
- unsigned int as = !!(vcpu->arch.msr & MSR_IS);
+ unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as);
}
void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
{
- unsigned int as = !!(vcpu->arch.msr & MSR_DS);
+ unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.fault_dear, as);
}
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
index d28e301..458946b 100644
--- a/arch/powerpc/kvm/e500_tlb.h
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -171,7 +171,7 @@
/* Does it match current guest AS? */
/* XXX what about IS != DS? */
- if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
+ if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
return 0;
gpa = get_tlb_raddr(tlbe);
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index b83ba58..c64fd29 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -242,9 +242,11 @@
switch (sprn) {
case SPRN_SRR0:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr0); break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr0);
+ break;
case SPRN_SRR1:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr1); break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr1);
+ break;
case SPRN_PVR:
kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break;
case SPRN_PIR:
@@ -261,13 +263,17 @@
kvmppc_set_gpr(vcpu, rt, get_tb()); break;
case SPRN_SPRG0:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg0); break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg0);
+ break;
case SPRN_SPRG1:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg1); break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg1);
+ break;
case SPRN_SPRG2:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg2); break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg2);
+ break;
case SPRN_SPRG3:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg3); break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg3);
+ break;
/* Note: SPRG4-7 are user-readable, so we don't get
* a trap. */
@@ -320,9 +326,11 @@
rs = get_rs(inst);
switch (sprn) {
case SPRN_SRR0:
- vcpu->arch.srr0 = kvmppc_get_gpr(vcpu, rs); break;
+ vcpu->arch.shared->srr0 = kvmppc_get_gpr(vcpu, rs);
+ break;
case SPRN_SRR1:
- vcpu->arch.srr1 = kvmppc_get_gpr(vcpu, rs); break;
+ vcpu->arch.shared->srr1 = kvmppc_get_gpr(vcpu, rs);
+ break;
/* XXX We need to context-switch the timebase for
* watchdog and FIT. */
@@ -337,13 +345,17 @@
break;
case SPRN_SPRG0:
- vcpu->arch.sprg0 = kvmppc_get_gpr(vcpu, rs); break;
+ vcpu->arch.shared->sprg0 = kvmppc_get_gpr(vcpu, rs);
+ break;
case SPRN_SPRG1:
- vcpu->arch.sprg1 = kvmppc_get_gpr(vcpu, rs); break;
+ vcpu->arch.shared->sprg1 = kvmppc_get_gpr(vcpu, rs);
+ break;
case SPRN_SPRG2:
- vcpu->arch.sprg2 = kvmppc_get_gpr(vcpu, rs); break;
+ vcpu->arch.shared->sprg2 = kvmppc_get_gpr(vcpu, rs);
+ break;
case SPRN_SPRG3:
- vcpu->arch.sprg3 = kvmppc_get_gpr(vcpu, rs); break;
+ vcpu->arch.shared->sprg3 = kvmppc_get_gpr(vcpu, rs);
+ break;
default:
emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 72a4ad8..2f87a16 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -38,9 +38,56 @@
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
- return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions);
+ return !(v->arch.shared->msr & MSR_WE) ||
+ !!(v->arch.pending_exceptions);
}
+int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
+{
+ int nr = kvmppc_get_gpr(vcpu, 11);
+ int r;
+ unsigned long __maybe_unused param1 = kvmppc_get_gpr(vcpu, 3);
+ unsigned long __maybe_unused param2 = kvmppc_get_gpr(vcpu, 4);
+ unsigned long __maybe_unused param3 = kvmppc_get_gpr(vcpu, 5);
+ unsigned long __maybe_unused param4 = kvmppc_get_gpr(vcpu, 6);
+ unsigned long r2 = 0;
+
+ if (!(vcpu->arch.shared->msr & MSR_SF)) {
+ /* 32 bit mode */
+ param1 &= 0xffffffff;
+ param2 &= 0xffffffff;
+ param3 &= 0xffffffff;
+ param4 &= 0xffffffff;
+ }
+
+ switch (nr) {
+ case HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE:
+ {
+ vcpu->arch.magic_page_pa = param1;
+ vcpu->arch.magic_page_ea = param2;
+
+ r2 = KVM_MAGIC_FEAT_SR;
+
+ r = HC_EV_SUCCESS;
+ break;
+ }
+ case HC_VENDOR_KVM | KVM_HC_FEATURES:
+ r = HC_EV_SUCCESS;
+#if defined(CONFIG_PPC_BOOK3S) /* XXX Missing magic page on BookE */
+ r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
+#endif
+
+ /* Second return value is in r4 */
+ break;
+ default:
+ r = HC_EV_UNIMPLEMENTED;
+ break;
+ }
+
+ kvmppc_set_gpr(vcpu, 4, r2);
+
+ return r;
+}
int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
@@ -145,8 +192,10 @@
case KVM_CAP_PPC_SEGSTATE:
case KVM_CAP_PPC_PAIRED_SINGLES:
case KVM_CAP_PPC_UNSET_IRQ:
+ case KVM_CAP_PPC_IRQ_LEVEL:
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_PPC_OSI:
+ case KVM_CAP_PPC_GET_PVINFO:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -534,16 +583,53 @@
return r;
}
+static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
+{
+ u32 inst_lis = 0x3c000000;
+ u32 inst_ori = 0x60000000;
+ u32 inst_nop = 0x60000000;
+ u32 inst_sc = 0x44000002;
+ u32 inst_imm_mask = 0xffff;
+
+ /*
+ * The hypercall to get into KVM from within guest context is as
+ * follows:
+ *
+ * lis r0, r0, KVM_SC_MAGIC_R0@h
+ * ori r0, KVM_SC_MAGIC_R0@l
+ * sc
+ * nop
+ */
+ pvinfo->hcall[0] = inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask);
+ pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask);
+ pvinfo->hcall[2] = inst_sc;
+ pvinfo->hcall[3] = inst_nop;
+
+ return 0;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
+ void __user *argp = (void __user *)arg;
long r;
switch (ioctl) {
+ case KVM_PPC_GET_PVINFO: {
+ struct kvm_ppc_pvinfo pvinfo;
+ r = kvm_vm_ioctl_get_pvinfo(&pvinfo);
+ if (copy_to_user(argp, &pvinfo, sizeof(pvinfo))) {
+ r = -EFAULT;
+ goto out;
+ }
+
+ break;
+ }
default:
r = -ENOTTY;
}
+out:
return r;
}
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index a8e8400..3aca1b0 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -98,6 +98,245 @@
__entry->word1, __entry->word2)
);
+
+/*************************************************************************
+ * Book3S trace points *
+ *************************************************************************/
+
+#ifdef CONFIG_PPC_BOOK3S
+
+TRACE_EVENT(kvm_book3s_exit,
+ TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
+ TP_ARGS(exit_nr, vcpu),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, exit_nr )
+ __field( unsigned long, pc )
+ __field( unsigned long, msr )
+ __field( unsigned long, dar )
+ __field( unsigned long, srr1 )
+ ),
+
+ TP_fast_assign(
+ __entry->exit_nr = exit_nr;
+ __entry->pc = kvmppc_get_pc(vcpu);
+ __entry->dar = kvmppc_get_fault_dar(vcpu);
+ __entry->msr = vcpu->arch.shared->msr;
+ __entry->srr1 = to_svcpu(vcpu)->shadow_srr1;
+ ),
+
+ TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx",
+ __entry->exit_nr, __entry->pc, __entry->msr, __entry->dar,
+ __entry->srr1)
+);
+
+TRACE_EVENT(kvm_book3s_reenter,
+ TP_PROTO(int r, struct kvm_vcpu *vcpu),
+ TP_ARGS(r, vcpu),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, r )
+ __field( unsigned long, pc )
+ ),
+
+ TP_fast_assign(
+ __entry->r = r;
+ __entry->pc = kvmppc_get_pc(vcpu);
+ ),
+
+ TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc)
+);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+
+TRACE_EVENT(kvm_book3s_64_mmu_map,
+ TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr,
+ struct kvmppc_pte *orig_pte),
+ TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte),
+
+ TP_STRUCT__entry(
+ __field( unsigned char, flag_w )
+ __field( unsigned char, flag_x )
+ __field( unsigned long, eaddr )
+ __field( unsigned long, hpteg )
+ __field( unsigned long, va )
+ __field( unsigned long long, vpage )
+ __field( unsigned long, hpaddr )
+ ),
+
+ TP_fast_assign(
+ __entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w';
+ __entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x';
+ __entry->eaddr = orig_pte->eaddr;
+ __entry->hpteg = hpteg;
+ __entry->va = va;
+ __entry->vpage = orig_pte->vpage;
+ __entry->hpaddr = hpaddr;
+ ),
+
+ TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx",
+ __entry->flag_w, __entry->flag_x, __entry->eaddr,
+ __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr)
+);
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+TRACE_EVENT(kvm_book3s_mmu_map,
+ TP_PROTO(struct hpte_cache *pte),
+ TP_ARGS(pte),
+
+ TP_STRUCT__entry(
+ __field( u64, host_va )
+ __field( u64, pfn )
+ __field( ulong, eaddr )
+ __field( u64, vpage )
+ __field( ulong, raddr )
+ __field( int, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->host_va = pte->host_va;
+ __entry->pfn = pte->pfn;
+ __entry->eaddr = pte->pte.eaddr;
+ __entry->vpage = pte->pte.vpage;
+ __entry->raddr = pte->pte.raddr;
+ __entry->flags = (pte->pte.may_read ? 0x4 : 0) |
+ (pte->pte.may_write ? 0x2 : 0) |
+ (pte->pte.may_execute ? 0x1 : 0);
+ ),
+
+ TP_printk("Map: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
+ __entry->host_va, __entry->pfn, __entry->eaddr,
+ __entry->vpage, __entry->raddr, __entry->flags)
+);
+
+TRACE_EVENT(kvm_book3s_mmu_invalidate,
+ TP_PROTO(struct hpte_cache *pte),
+ TP_ARGS(pte),
+
+ TP_STRUCT__entry(
+ __field( u64, host_va )
+ __field( u64, pfn )
+ __field( ulong, eaddr )
+ __field( u64, vpage )
+ __field( ulong, raddr )
+ __field( int, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->host_va = pte->host_va;
+ __entry->pfn = pte->pfn;
+ __entry->eaddr = pte->pte.eaddr;
+ __entry->vpage = pte->pte.vpage;
+ __entry->raddr = pte->pte.raddr;
+ __entry->flags = (pte->pte.may_read ? 0x4 : 0) |
+ (pte->pte.may_write ? 0x2 : 0) |
+ (pte->pte.may_execute ? 0x1 : 0);
+ ),
+
+ TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
+ __entry->host_va, __entry->pfn, __entry->eaddr,
+ __entry->vpage, __entry->raddr, __entry->flags)
+);
+
+TRACE_EVENT(kvm_book3s_mmu_flush,
+ TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1,
+ unsigned long long p2),
+ TP_ARGS(type, vcpu, p1, p2),
+
+ TP_STRUCT__entry(
+ __field( int, count )
+ __field( unsigned long long, p1 )
+ __field( unsigned long long, p2 )
+ __field( const char *, type )
+ ),
+
+ TP_fast_assign(
+ __entry->count = vcpu->arch.hpte_cache_count;
+ __entry->p1 = p1;
+ __entry->p2 = p2;
+ __entry->type = type;
+ ),
+
+ TP_printk("Flush %d %sPTEs: %llx - %llx",
+ __entry->count, __entry->type, __entry->p1, __entry->p2)
+);
+
+TRACE_EVENT(kvm_book3s_slb_found,
+ TP_PROTO(unsigned long long gvsid, unsigned long long hvsid),
+ TP_ARGS(gvsid, hvsid),
+
+ TP_STRUCT__entry(
+ __field( unsigned long long, gvsid )
+ __field( unsigned long long, hvsid )
+ ),
+
+ TP_fast_assign(
+ __entry->gvsid = gvsid;
+ __entry->hvsid = hvsid;
+ ),
+
+ TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid)
+);
+
+TRACE_EVENT(kvm_book3s_slb_fail,
+ TP_PROTO(u16 sid_map_mask, unsigned long long gvsid),
+ TP_ARGS(sid_map_mask, gvsid),
+
+ TP_STRUCT__entry(
+ __field( unsigned short, sid_map_mask )
+ __field( unsigned long long, gvsid )
+ ),
+
+ TP_fast_assign(
+ __entry->sid_map_mask = sid_map_mask;
+ __entry->gvsid = gvsid;
+ ),
+
+ TP_printk("%x/%x: %llx", __entry->sid_map_mask,
+ SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid)
+);
+
+TRACE_EVENT(kvm_book3s_slb_map,
+ TP_PROTO(u16 sid_map_mask, unsigned long long gvsid,
+ unsigned long long hvsid),
+ TP_ARGS(sid_map_mask, gvsid, hvsid),
+
+ TP_STRUCT__entry(
+ __field( unsigned short, sid_map_mask )
+ __field( unsigned long long, guest_vsid )
+ __field( unsigned long long, host_vsid )
+ ),
+
+ TP_fast_assign(
+ __entry->sid_map_mask = sid_map_mask;
+ __entry->guest_vsid = gvsid;
+ __entry->host_vsid = hvsid;
+ ),
+
+ TP_printk("%x: %llx -> %llx", __entry->sid_map_mask,
+ __entry->guest_vsid, __entry->host_vsid)
+);
+
+TRACE_EVENT(kvm_book3s_slbmte,
+ TP_PROTO(u64 slb_vsid, u64 slb_esid),
+ TP_ARGS(slb_vsid, slb_esid),
+
+ TP_STRUCT__entry(
+ __field( u64, slb_vsid )
+ __field( u64, slb_esid )
+ ),
+
+ TP_fast_assign(
+ __entry->slb_vsid = slb_vsid;
+ __entry->slb_esid = slb_esid;
+ ),
+
+ TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid)
+);
+
+#endif /* CONFIG_PPC_BOOK3S */
+
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 81c9208..956154f 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -21,6 +21,16 @@
source "arch/powerpc/platforms/40x/Kconfig"
source "arch/powerpc/platforms/amigaone/Kconfig"
+config KVM_GUEST
+ bool "KVM Guest support"
+ default y
+ ---help---
+ This option enables various optimizations for running under the KVM
+ hypervisor. Overhead for the kernel when not running inside KVM should
+ be minimal.
+
+ In case of doubt, say Y
+
config PPC_NATIVE
bool
depends on 6xx || PPC64
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 42e512b..287d7bb 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -5,6 +5,7 @@
header-y += cmb.h
header-y += dasd.h
header-y += debug.h
+header-y += kvm_virtio.h
header-y += monwriter.h
header-y += qeth.h
header-y += schid.h
diff --git a/arch/s390/include/asm/kvm_virtio.h b/arch/s390/include/asm/kvm_virtio.h
index acdfdff..72f6141 100644
--- a/arch/s390/include/asm/kvm_virtio.h
+++ b/arch/s390/include/asm/kvm_virtio.h
@@ -54,4 +54,11 @@
* This is pagesize for historical reasons. */
#define KVM_S390_VIRTIO_RING_ALIGN 4096
+
+/* These values are supposed to be in ext_params on an interrupt */
+#define VIRTIO_PARAM_MASK 0xff
+#define VIRTIO_PARAM_VRING_INTERRUPT 0x0
+#define VIRTIO_PARAM_CONFIG_CHANGED 0x1
+#define VIRTIO_PARAM_DEV_ADD 0x2
+
#endif
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 1f99ecf..b36c6b3 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -139,6 +139,7 @@
void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu);
unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu);
void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu);
+ void (*get_idt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu);
ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu);
int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu);
int (*cpl)(struct kvm_vcpu *vcpu);
@@ -156,7 +157,10 @@
unsigned long orig_val;
u64 orig_val64;
};
- unsigned long *ptr;
+ union {
+ unsigned long *reg;
+ unsigned long mem;
+ } addr;
union {
unsigned long val;
u64 val64;
@@ -190,6 +194,7 @@
bool has_seg_override;
u8 seg_override;
unsigned int d;
+ int (*execute)(struct x86_emulate_ctxt *ctxt);
unsigned long regs[NR_VCPU_REGS];
unsigned long eip;
/* modrm */
@@ -197,17 +202,16 @@
u8 modrm_mod;
u8 modrm_reg;
u8 modrm_rm;
- u8 use_modrm_ea;
+ u8 modrm_seg;
bool rip_relative;
- unsigned long modrm_ea;
- void *modrm_ptr;
- unsigned long modrm_val;
struct fetch_cache fetch;
struct read_cache io_read;
struct read_cache mem_read;
};
struct x86_emulate_ctxt {
+ struct x86_emulate_ops *ops;
+
/* Register state before/after emulation. */
struct kvm_vcpu *vcpu;
@@ -220,12 +224,11 @@
/* interruptibility state, as a result of execution of STI or MOV SS */
int interruptibility;
- bool restart; /* restart string instruction after writeback */
+ bool perm_ok; /* do not check permissions if true */
int exception; /* exception that happens during emulation or -1 */
u32 error_code; /* error code for exception */
bool error_code_valid;
- unsigned long cr2; /* faulted address in case of #PF */
/* decode cache */
struct decode_cache decode;
@@ -249,13 +252,14 @@
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
#endif
-int x86_decode_insn(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops);
-int x86_emulate_insn(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops);
+int x86_decode_insn(struct x86_emulate_ctxt *ctxt);
+#define EMULATION_FAILED -1
+#define EMULATION_OK 0
+#define EMULATION_RESTART 1
+int x86_emulate_insn(struct x86_emulate_ctxt *ctxt);
int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops,
u16 tss_selector, int reason,
bool has_error_code, u32 error_code);
-
+int emulate_int_real(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops, int irq);
#endif /* _ASM_X86_KVM_X86_EMULATE_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c52e2eb..9e6fe39 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -236,10 +236,14 @@
*/
struct kvm_mmu {
void (*new_cr3)(struct kvm_vcpu *vcpu);
+ void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
+ unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
+ void (*inject_page_fault)(struct kvm_vcpu *vcpu);
void (*free)(struct kvm_vcpu *vcpu);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
u32 *error);
+ gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
void (*prefetch_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *page);
int (*sync_page)(struct kvm_vcpu *vcpu,
@@ -249,13 +253,18 @@
int root_level;
int shadow_root_level;
union kvm_mmu_page_role base_role;
+ bool direct_map;
u64 *pae_root;
+ u64 *lm_root;
u64 rsvd_bits_mask[2][4];
+
+ bool nx;
+
+ u64 pdptrs[4]; /* pae */
};
struct kvm_vcpu_arch {
- u64 host_tsc;
/*
* rip and regs accesses must go through
* kvm_{register,rip}_{read,write} functions.
@@ -272,7 +281,6 @@
unsigned long cr4_guest_owned_bits;
unsigned long cr8;
u32 hflags;
- u64 pdptrs[4]; /* pae */
u64 efer;
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
@@ -282,7 +290,41 @@
u64 ia32_misc_enable_msr;
bool tpr_access_reporting;
+ /*
+ * Paging state of the vcpu
+ *
+ * If the vcpu runs in guest mode with two level paging this still saves
+ * the paging mode of the l1 guest. This context is always used to
+ * handle faults.
+ */
struct kvm_mmu mmu;
+
+ /*
+ * Paging state of an L2 guest (used for nested npt)
+ *
+ * This context will save all necessary information to walk page tables
+ * of the an L2 guest. This context is only initialized for page table
+ * walking and not for faulting since we never handle l2 page faults on
+ * the host.
+ */
+ struct kvm_mmu nested_mmu;
+
+ /*
+ * Pointer to the mmu context currently used for
+ * gva_to_gpa translations.
+ */
+ struct kvm_mmu *walk_mmu;
+
+ /*
+ * This struct is filled with the necessary information to propagate a
+ * page fault into the guest
+ */
+ struct {
+ u64 address;
+ unsigned error_code;
+ bool nested;
+ } fault;
+
/* only needed in kvm_pv_mmu_op() path, but it's hot so
* put it here to avoid allocation */
struct kvm_pv_mmu_op_buffer mmu_op_buffer;
@@ -336,9 +378,15 @@
gpa_t time;
struct pvclock_vcpu_time_info hv_clock;
- unsigned int hv_clock_tsc_khz;
+ unsigned int hw_tsc_khz;
unsigned int time_offset;
struct page *time_page;
+ u64 last_host_tsc;
+ u64 last_guest_tsc;
+ u64 last_kernel_ns;
+ u64 last_tsc_nsec;
+ u64 last_tsc_write;
+ bool tsc_catchup;
bool nmi_pending;
bool nmi_injected;
@@ -367,9 +415,9 @@
};
struct kvm_arch {
- unsigned int n_free_mmu_pages;
+ unsigned int n_used_mmu_pages;
unsigned int n_requested_mmu_pages;
- unsigned int n_alloc_mmu_pages;
+ unsigned int n_max_mmu_pages;
atomic_t invlpg_counter;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/*
@@ -394,8 +442,14 @@
gpa_t ept_identity_map_addr;
unsigned long irq_sources_bitmap;
- u64 vm_init_tsc;
s64 kvmclock_offset;
+ spinlock_t tsc_write_lock;
+ u64 last_tsc_nsec;
+ u64 last_tsc_offset;
+ u64 last_tsc_write;
+ u32 virtual_tsc_khz;
+ u32 virtual_tsc_mult;
+ s8 virtual_tsc_shift;
struct kvm_xen_hvm_config xen_hvm_config;
@@ -505,6 +559,7 @@
void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
bool has_error_code, u32 error_code,
bool reinject);
+ void (*cancel_injection)(struct kvm_vcpu *vcpu);
int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
int (*nmi_allowed)(struct kvm_vcpu *vcpu);
bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
@@ -517,11 +572,16 @@
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
int (*get_lpage_level)(void);
bool (*rdtscp_supported)(void);
+ void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment);
+
+ void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry);
bool (*has_wbinvd_exit)(void);
+ void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
+
const struct trace_print_flags *exit_reasons_str;
};
@@ -544,7 +604,7 @@
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
-int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
+int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes);
@@ -608,8 +668,11 @@
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
-void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
- u32 error_code);
+void kvm_inject_page_fault(struct kvm_vcpu *vcpu);
+int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ gfn_t gfn, void *data, int offset, int len,
+ u32 access);
+void kvm_propagate_fault(struct kvm_vcpu *vcpu);
bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
int kvm_pic_set_irq(void *opaque, int irq, int level);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 05eba5e..7b562b6 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -158,6 +158,12 @@
return cpuid_eax(KVM_CPUID_FEATURES);
}
+#ifdef CONFIG_KVM_GUEST
+void __init kvm_guest_init(void);
+#else
+#define kvm_guest_init() do { } while (0)
#endif
+#endif /* __KERNEL__ */
+
#endif /* _ASM_X86_KVM_PARA_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 986f779..83c4bb1 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -198,6 +198,7 @@
#define MSR_IA32_TSC 0x00000010
#define MSR_IA32_PLATFORM_ID 0x00000017
#define MSR_IA32_EBL_CR_POWERON 0x0000002a
+#define MSR_EBC_FREQUENCY_ID 0x0000002c
#define MSR_IA32_FEATURE_CONTROL 0x0000003a
#define FEATURE_CONTROL_LOCKED (1<<0)
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index cd02f32..7f7e577 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -12,4 +12,42 @@
struct pvclock_vcpu_time_info *vcpu,
struct timespec *ts);
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
+{
+ u64 product;
+#ifdef __i386__
+ u32 tmp1, tmp2;
+#endif
+
+ if (shift < 0)
+ delta >>= -shift;
+ else
+ delta <<= shift;
+
+#ifdef __i386__
+ __asm__ (
+ "mul %5 ; "
+ "mov %4,%%eax ; "
+ "mov %%edx,%4 ; "
+ "mul %5 ; "
+ "xor %5,%5 ; "
+ "add %4,%%eax ; "
+ "adc %5,%%edx ; "
+ : "=A" (product), "=r" (tmp1), "=r" (tmp2)
+ : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
+#elif defined(__x86_64__)
+ __asm__ (
+ "mul %%rdx ; shrd $32,%%rdx,%%rax"
+ : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
+#else
+#error implement me!
+#endif
+
+ return product;
+}
+
#endif /* _ASM_X86_PVCLOCK_H */
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index eb9b76c..ca43ce3 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -128,13 +128,15 @@
static int kvm_register_clock(char *txt)
{
int cpu = smp_processor_id();
- int low, high;
+ int low, high, ret;
+
low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
+ ret = native_write_msr_safe(msr_kvm_system_time, low, high);
printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
cpu, high, low, txt);
- return native_write_msr_safe(msr_kvm_system_time, low, high);
+ return ret;
}
#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 239427c..bab3b9e 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -82,7 +82,8 @@
static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
{
u64 delta = native_read_tsc() - shadow->tsc_timestamp;
- return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
+ return pvclock_scale_delta(delta, shadow->tsc_to_nsec_mul,
+ shadow->tsc_shift);
}
/*
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 970bbd4..ddc131f 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -64,6 +64,13 @@
To compile this as a module, choose M here: the module
will be called kvm-amd.
+config KVM_MMU_AUDIT
+ bool "Audit KVM MMU"
+ depends on KVM && TRACEPOINTS
+ ---help---
+ This option adds a R/W kVM module parameter 'mmu_audit', which allows
+ audit KVM MMU at runtime.
+
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
source drivers/vhost/Kconfig
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 66ca98a..38b6e8d 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -9,7 +9,7 @@
* privileged instructions:
*
* Copyright (C) 2006 Qumranet
- * Copyright 2010 Red Hat, Inc. and/or its affilates.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
*
* Avi Kivity <avi@qumranet.com>
* Yaniv Kamay <yaniv@qumranet.com>
@@ -51,13 +51,13 @@
#define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
#define DstReg (2<<1) /* Register operand. */
#define DstMem (3<<1) /* Memory operand. */
-#define DstAcc (4<<1) /* Destination Accumulator */
+#define DstAcc (4<<1) /* Destination Accumulator */
#define DstDI (5<<1) /* Destination is in ES:(E)DI */
#define DstMem64 (6<<1) /* 64bit memory operand */
+#define DstImmUByte (7<<1) /* 8-bit unsigned immediate operand */
#define DstMask (7<<1)
/* Source operand type. */
#define SrcNone (0<<4) /* No source operand. */
-#define SrcImplicit (0<<4) /* Source operand is implicit in the opcode. */
#define SrcReg (1<<4) /* Register operand. */
#define SrcMem (2<<4) /* Memory operand. */
#define SrcMem16 (3<<4) /* Memory operand (16-bit). */
@@ -71,6 +71,7 @@
#define SrcImmFAddr (0xb<<4) /* Source is immediate far address */
#define SrcMemFAddr (0xc<<4) /* Source is far address in memory */
#define SrcAcc (0xd<<4) /* Source Accumulator */
+#define SrcImmU16 (0xe<<4) /* Immediate operand, unsigned, 16 bits */
#define SrcMask (0xf<<4)
/* Generic ModRM decode. */
#define ModRM (1<<8)
@@ -82,8 +83,10 @@
#define Stack (1<<13) /* Stack instruction (push/pop) */
#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
-#define GroupMask 0xff /* Group number stored in bits 0:7 */
/* Misc flags */
+#define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
+#define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
+#define Undefined (1<<25) /* No Such Instruction */
#define Lock (1<<26) /* lock prefix is allowed for the instruction */
#define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
#define No64 (1<<28)
@@ -92,285 +95,30 @@
#define Src2CL (1<<29)
#define Src2ImmByte (2<<29)
#define Src2One (3<<29)
+#define Src2Imm (4<<29)
#define Src2Mask (7<<29)
-enum {
- Group1_80, Group1_81, Group1_82, Group1_83,
- Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
- Group8, Group9,
+#define X2(x...) x, x
+#define X3(x...) X2(x), x
+#define X4(x...) X2(x), X2(x)
+#define X5(x...) X4(x), x
+#define X6(x...) X4(x), X2(x)
+#define X7(x...) X4(x), X3(x)
+#define X8(x...) X4(x), X4(x)
+#define X16(x...) X8(x), X8(x)
+
+struct opcode {
+ u32 flags;
+ union {
+ int (*execute)(struct x86_emulate_ctxt *ctxt);
+ struct opcode *group;
+ struct group_dual *gdual;
+ } u;
};
-static u32 opcode_table[256] = {
- /* 0x00 - 0x07 */
- ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
- ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
- ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
- ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
- /* 0x08 - 0x0F */
- ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
- ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
- ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
- ImplicitOps | Stack | No64, 0,
- /* 0x10 - 0x17 */
- ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
- ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
- ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
- ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
- /* 0x18 - 0x1F */
- ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
- ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
- ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
- ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
- /* 0x20 - 0x27 */
- ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
- ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
- ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
- /* 0x28 - 0x2F */
- ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
- ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
- ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
- /* 0x30 - 0x37 */
- ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
- ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
- ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
- /* 0x38 - 0x3F */
- ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
- ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
- ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
- 0, 0,
- /* 0x40 - 0x47 */
- DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
- /* 0x48 - 0x4F */
- DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
- /* 0x50 - 0x57 */
- SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
- SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
- /* 0x58 - 0x5F */
- DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
- DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
- /* 0x60 - 0x67 */
- ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
- 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
- 0, 0, 0, 0,
- /* 0x68 - 0x6F */
- SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
- DstDI | ByteOp | Mov | String, DstDI | Mov | String, /* insb, insw/insd */
- SrcSI | ByteOp | ImplicitOps | String, SrcSI | ImplicitOps | String, /* outsb, outsw/outsd */
- /* 0x70 - 0x77 */
- SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
- SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
- /* 0x78 - 0x7F */
- SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
- SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
- /* 0x80 - 0x87 */
- Group | Group1_80, Group | Group1_81,
- Group | Group1_82, Group | Group1_83,
- ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
- ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
- /* 0x88 - 0x8F */
- ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
- ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
- DstMem | SrcNone | ModRM | Mov, ModRM | DstReg,
- ImplicitOps | SrcMem16 | ModRM, Group | Group1A,
- /* 0x90 - 0x97 */
- DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
- /* 0x98 - 0x9F */
- 0, 0, SrcImmFAddr | No64, 0,
- ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
- /* 0xA0 - 0xA7 */
- ByteOp | DstAcc | SrcMem | Mov | MemAbs, DstAcc | SrcMem | Mov | MemAbs,
- ByteOp | DstMem | SrcAcc | Mov | MemAbs, DstMem | SrcAcc | Mov | MemAbs,
- ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String,
- ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String,
- /* 0xA8 - 0xAF */
- DstAcc | SrcImmByte | ByteOp, DstAcc | SrcImm, ByteOp | DstDI | Mov | String, DstDI | Mov | String,
- ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String,
- ByteOp | DstDI | String, DstDI | String,
- /* 0xB0 - 0xB7 */
- ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
- ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
- ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
- ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
- /* 0xB8 - 0xBF */
- DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
- DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
- DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
- DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
- /* 0xC0 - 0xC7 */
- ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
- 0, ImplicitOps | Stack, 0, 0,
- ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
- /* 0xC8 - 0xCF */
- 0, 0, 0, ImplicitOps | Stack,
- ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps,
- /* 0xD0 - 0xD7 */
- ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
- ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
- 0, 0, 0, 0,
- /* 0xD8 - 0xDF */
- 0, 0, 0, 0, 0, 0, 0, 0,
- /* 0xE0 - 0xE7 */
- 0, 0, 0, 0,
- ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc,
- ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc,
- /* 0xE8 - 0xEF */
- SrcImm | Stack, SrcImm | ImplicitOps,
- SrcImmFAddr | No64, SrcImmByte | ImplicitOps,
- SrcNone | ByteOp | DstAcc, SrcNone | DstAcc,
- SrcNone | ByteOp | DstAcc, SrcNone | DstAcc,
- /* 0xF0 - 0xF7 */
- 0, 0, 0, 0,
- ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3,
- /* 0xF8 - 0xFF */
- ImplicitOps, 0, ImplicitOps, ImplicitOps,
- ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
-};
-
-static u32 twobyte_table[256] = {
- /* 0x00 - 0x0F */
- 0, Group | GroupDual | Group7, 0, 0,
- 0, ImplicitOps, ImplicitOps | Priv, 0,
- ImplicitOps | Priv, ImplicitOps | Priv, 0, 0,
- 0, ImplicitOps | ModRM, 0, 0,
- /* 0x10 - 0x1F */
- 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
- /* 0x20 - 0x2F */
- ModRM | ImplicitOps | Priv, ModRM | Priv,
- ModRM | ImplicitOps | Priv, ModRM | Priv,
- 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- /* 0x30 - 0x3F */
- ImplicitOps | Priv, 0, ImplicitOps | Priv, 0,
- ImplicitOps, ImplicitOps | Priv, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- /* 0x40 - 0x47 */
- DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
- DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
- DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
- DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
- /* 0x48 - 0x4F */
- DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
- DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
- DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
- DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
- /* 0x50 - 0x5F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /* 0x60 - 0x6F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /* 0x70 - 0x7F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /* 0x80 - 0x8F */
- SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
- SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
- /* 0x90 - 0x9F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /* 0xA0 - 0xA7 */
- ImplicitOps | Stack, ImplicitOps | Stack,
- 0, DstMem | SrcReg | ModRM | BitOp,
- DstMem | SrcReg | Src2ImmByte | ModRM,
- DstMem | SrcReg | Src2CL | ModRM, 0, 0,
- /* 0xA8 - 0xAF */
- ImplicitOps | Stack, ImplicitOps | Stack,
- 0, DstMem | SrcReg | ModRM | BitOp | Lock,
- DstMem | SrcReg | Src2ImmByte | ModRM,
- DstMem | SrcReg | Src2CL | ModRM,
- ModRM, 0,
- /* 0xB0 - 0xB7 */
- ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
- 0, DstMem | SrcReg | ModRM | BitOp | Lock,
- 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
- DstReg | SrcMem16 | ModRM | Mov,
- /* 0xB8 - 0xBF */
- 0, 0,
- Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock,
- 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
- DstReg | SrcMem16 | ModRM | Mov,
- /* 0xC0 - 0xCF */
- 0, 0, 0, DstMem | SrcReg | ModRM | Mov,
- 0, 0, 0, Group | GroupDual | Group9,
- 0, 0, 0, 0, 0, 0, 0, 0,
- /* 0xD0 - 0xDF */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /* 0xE0 - 0xEF */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /* 0xF0 - 0xFF */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-static u32 group_table[] = {
- [Group1_80*8] =
- ByteOp | DstMem | SrcImm | ModRM | Lock,
- ByteOp | DstMem | SrcImm | ModRM | Lock,
- ByteOp | DstMem | SrcImm | ModRM | Lock,
- ByteOp | DstMem | SrcImm | ModRM | Lock,
- ByteOp | DstMem | SrcImm | ModRM | Lock,
- ByteOp | DstMem | SrcImm | ModRM | Lock,
- ByteOp | DstMem | SrcImm | ModRM | Lock,
- ByteOp | DstMem | SrcImm | ModRM,
- [Group1_81*8] =
- DstMem | SrcImm | ModRM | Lock,
- DstMem | SrcImm | ModRM | Lock,
- DstMem | SrcImm | ModRM | Lock,
- DstMem | SrcImm | ModRM | Lock,
- DstMem | SrcImm | ModRM | Lock,
- DstMem | SrcImm | ModRM | Lock,
- DstMem | SrcImm | ModRM | Lock,
- DstMem | SrcImm | ModRM,
- [Group1_82*8] =
- ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
- ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
- ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
- ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
- ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
- ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
- ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
- ByteOp | DstMem | SrcImm | ModRM | No64,
- [Group1_83*8] =
- DstMem | SrcImmByte | ModRM | Lock,
- DstMem | SrcImmByte | ModRM | Lock,
- DstMem | SrcImmByte | ModRM | Lock,
- DstMem | SrcImmByte | ModRM | Lock,
- DstMem | SrcImmByte | ModRM | Lock,
- DstMem | SrcImmByte | ModRM | Lock,
- DstMem | SrcImmByte | ModRM | Lock,
- DstMem | SrcImmByte | ModRM,
- [Group1A*8] =
- DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
- [Group3_Byte*8] =
- ByteOp | SrcImm | DstMem | ModRM, ByteOp | SrcImm | DstMem | ModRM,
- ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
- 0, 0, 0, 0,
- [Group3*8] =
- DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
- DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
- 0, 0, 0, 0,
- [Group4*8] =
- ByteOp | DstMem | SrcNone | ModRM | Lock, ByteOp | DstMem | SrcNone | ModRM | Lock,
- 0, 0, 0, 0, 0, 0,
- [Group5*8] =
- DstMem | SrcNone | ModRM | Lock, DstMem | SrcNone | ModRM | Lock,
- SrcMem | ModRM | Stack, 0,
- SrcMem | ModRM | Stack, SrcMemFAddr | ModRM | ImplicitOps,
- SrcMem | ModRM | Stack, 0,
- [Group7*8] =
- 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv,
- SrcNone | ModRM | DstMem | Mov, 0,
- SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv,
- [Group8*8] =
- 0, 0, 0, 0,
- DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock,
- DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock,
- [Group9*8] =
- 0, DstMem64 | ModRM | Lock, 0, 0, 0, 0, 0, 0,
-};
-
-static u32 group2_table[] = {
- [Group7*8] =
- SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv,
- SrcNone | ModRM | DstMem | Mov, 0,
- SrcMem16 | ModRM | Mov | Priv, 0,
- [Group9*8] =
- 0, 0, 0, 0, 0, 0, 0, 0,
+struct group_dual {
+ struct opcode mod012[8];
+ struct opcode mod3[8];
};
/* EFLAGS bit definitions. */
@@ -392,6 +140,9 @@
#define EFLG_PF (1<<2)
#define EFLG_CF (1<<0)
+#define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
+#define EFLG_RESERVED_ONE_MASK 2
+
/*
* Instruction emulation:
* Most instructions are emulated directly via a fragment of inline assembly
@@ -444,13 +195,13 @@
#define ON64(x)
#endif
-#define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \
+#define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix, _dsttype) \
do { \
__asm__ __volatile__ ( \
_PRE_EFLAGS("0", "4", "2") \
_op _suffix " %"_x"3,%1; " \
_POST_EFLAGS("0", "4", "2") \
- : "=m" (_eflags), "=m" ((_dst).val), \
+ : "=m" (_eflags), "+q" (*(_dsttype*)&(_dst).val),\
"=&r" (_tmp) \
: _y ((_src).val), "i" (EFLAGS_MASK)); \
} while (0)
@@ -463,13 +214,13 @@
\
switch ((_dst).bytes) { \
case 2: \
- ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
+ ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w",u16);\
break; \
case 4: \
- ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
+ ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l",u32);\
break; \
case 8: \
- ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
+ ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q",u64)); \
break; \
} \
} while (0)
@@ -479,7 +230,7 @@
unsigned long _tmp; \
switch ((_dst).bytes) { \
case 1: \
- ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \
+ ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b",u8); \
break; \
default: \
__emulate_2op_nobyte(_op, _src, _dst, _eflags, \
@@ -566,6 +317,74 @@
} \
} while (0)
+#define __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, _suffix) \
+ do { \
+ unsigned long _tmp; \
+ \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0", "4", "1") \
+ _op _suffix " %5; " \
+ _POST_EFLAGS("0", "4", "1") \
+ : "=m" (_eflags), "=&r" (_tmp), \
+ "+a" (_rax), "+d" (_rdx) \
+ : "i" (EFLAGS_MASK), "m" ((_src).val), \
+ "a" (_rax), "d" (_rdx)); \
+ } while (0)
+
+#define __emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, _eflags, _suffix, _ex) \
+ do { \
+ unsigned long _tmp; \
+ \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0", "5", "1") \
+ "1: \n\t" \
+ _op _suffix " %6; " \
+ "2: \n\t" \
+ _POST_EFLAGS("0", "5", "1") \
+ ".pushsection .fixup,\"ax\" \n\t" \
+ "3: movb $1, %4 \n\t" \
+ "jmp 2b \n\t" \
+ ".popsection \n\t" \
+ _ASM_EXTABLE(1b, 3b) \
+ : "=m" (_eflags), "=&r" (_tmp), \
+ "+a" (_rax), "+d" (_rdx), "+qm"(_ex) \
+ : "i" (EFLAGS_MASK), "m" ((_src).val), \
+ "a" (_rax), "d" (_rdx)); \
+ } while (0)
+
+/* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */
+#define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \
+ do { \
+ switch((_src).bytes) { \
+ case 1: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "b"); break; \
+ case 2: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "w"); break; \
+ case 4: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "l"); break; \
+ case 8: ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "q")); break; \
+ } \
+ } while (0)
+
+#define emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, _eflags, _ex) \
+ do { \
+ switch((_src).bytes) { \
+ case 1: \
+ __emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \
+ _eflags, "b", _ex); \
+ break; \
+ case 2: \
+ __emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \
+ _eflags, "w", _ex); \
+ break; \
+ case 4: \
+ __emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \
+ _eflags, "l", _ex); \
+ break; \
+ case 8: ON64( \
+ __emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \
+ _eflags, "q", _ex)); \
+ break; \
+ } \
+ } while (0)
+
/* Fetch next part of the instruction being emulated. */
#define insn_fetch(_type, _size, _eip) \
({ unsigned long _x; \
@@ -661,7 +480,6 @@
ctxt->exception = vec;
ctxt->error_code = error;
ctxt->error_code_valid = valid;
- ctxt->restart = false;
}
static void emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
@@ -669,11 +487,9 @@
emulate_exception(ctxt, GP_VECTOR, err, true);
}
-static void emulate_pf(struct x86_emulate_ctxt *ctxt, unsigned long addr,
- int err)
+static void emulate_pf(struct x86_emulate_ctxt *ctxt)
{
- ctxt->cr2 = addr;
- emulate_exception(ctxt, PF_VECTOR, err, true);
+ emulate_exception(ctxt, PF_VECTOR, 0, true);
}
static void emulate_ud(struct x86_emulate_ctxt *ctxt)
@@ -686,6 +502,12 @@
emulate_exception(ctxt, TS_VECTOR, err, true);
}
+static int emulate_de(struct x86_emulate_ctxt *ctxt)
+{
+ emulate_exception(ctxt, DE_VECTOR, 0, false);
+ return X86EMUL_PROPAGATE_FAULT;
+}
+
static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops,
unsigned long eip, u8 *dest)
@@ -742,7 +564,7 @@
static int read_descriptor(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops,
- void *ptr,
+ ulong addr,
u16 *size, unsigned long *address, int op_bytes)
{
int rc;
@@ -750,12 +572,10 @@
if (op_bytes == 2)
op_bytes = 3;
*address = 0;
- rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
- ctxt->vcpu, NULL);
+ rc = ops->read_std(addr, (unsigned long *)size, 2, ctxt->vcpu, NULL);
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
- ctxt->vcpu, NULL);
+ rc = ops->read_std(addr + 2, address, op_bytes, ctxt->vcpu, NULL);
return rc;
}
@@ -794,6 +614,24 @@
return (!!rc ^ (condition & 1));
}
+static void fetch_register_operand(struct operand *op)
+{
+ switch (op->bytes) {
+ case 1:
+ op->val = *(u8 *)op->addr.reg;
+ break;
+ case 2:
+ op->val = *(u16 *)op->addr.reg;
+ break;
+ case 4:
+ op->val = *(u32 *)op->addr.reg;
+ break;
+ case 8:
+ op->val = *(u64 *)op->addr.reg;
+ break;
+ }
+}
+
static void decode_register_operand(struct operand *op,
struct decode_cache *c,
int inhibit_bytereg)
@@ -805,34 +643,25 @@
reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
op->type = OP_REG;
if ((c->d & ByteOp) && !inhibit_bytereg) {
- op->ptr = decode_register(reg, c->regs, highbyte_regs);
- op->val = *(u8 *)op->ptr;
+ op->addr.reg = decode_register(reg, c->regs, highbyte_regs);
op->bytes = 1;
} else {
- op->ptr = decode_register(reg, c->regs, 0);
+ op->addr.reg = decode_register(reg, c->regs, 0);
op->bytes = c->op_bytes;
- switch (op->bytes) {
- case 2:
- op->val = *(u16 *)op->ptr;
- break;
- case 4:
- op->val = *(u32 *)op->ptr;
- break;
- case 8:
- op->val = *(u64 *) op->ptr;
- break;
- }
}
+ fetch_register_operand(op);
op->orig_val = op->val;
}
static int decode_modrm(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
+ struct x86_emulate_ops *ops,
+ struct operand *op)
{
struct decode_cache *c = &ctxt->decode;
u8 sib;
int index_reg = 0, base_reg = 0, scale;
int rc = X86EMUL_CONTINUE;
+ ulong modrm_ea = 0;
if (c->rex_prefix) {
c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */
@@ -844,16 +673,19 @@
c->modrm_mod |= (c->modrm & 0xc0) >> 6;
c->modrm_reg |= (c->modrm & 0x38) >> 3;
c->modrm_rm |= (c->modrm & 0x07);
- c->modrm_ea = 0;
- c->use_modrm_ea = 1;
+ c->modrm_seg = VCPU_SREG_DS;
if (c->modrm_mod == 3) {
- c->modrm_ptr = decode_register(c->modrm_rm,
+ op->type = OP_REG;
+ op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
+ op->addr.reg = decode_register(c->modrm_rm,
c->regs, c->d & ByteOp);
- c->modrm_val = *(unsigned long *)c->modrm_ptr;
+ fetch_register_operand(op);
return rc;
}
+ op->type = OP_MEM;
+
if (c->ad_bytes == 2) {
unsigned bx = c->regs[VCPU_REGS_RBX];
unsigned bp = c->regs[VCPU_REGS_RBP];
@@ -864,47 +696,46 @@
switch (c->modrm_mod) {
case 0:
if (c->modrm_rm == 6)
- c->modrm_ea += insn_fetch(u16, 2, c->eip);
+ modrm_ea += insn_fetch(u16, 2, c->eip);
break;
case 1:
- c->modrm_ea += insn_fetch(s8, 1, c->eip);
+ modrm_ea += insn_fetch(s8, 1, c->eip);
break;
case 2:
- c->modrm_ea += insn_fetch(u16, 2, c->eip);
+ modrm_ea += insn_fetch(u16, 2, c->eip);
break;
}
switch (c->modrm_rm) {
case 0:
- c->modrm_ea += bx + si;
+ modrm_ea += bx + si;
break;
case 1:
- c->modrm_ea += bx + di;
+ modrm_ea += bx + di;
break;
case 2:
- c->modrm_ea += bp + si;
+ modrm_ea += bp + si;
break;
case 3:
- c->modrm_ea += bp + di;
+ modrm_ea += bp + di;
break;
case 4:
- c->modrm_ea += si;
+ modrm_ea += si;
break;
case 5:
- c->modrm_ea += di;
+ modrm_ea += di;
break;
case 6:
if (c->modrm_mod != 0)
- c->modrm_ea += bp;
+ modrm_ea += bp;
break;
case 7:
- c->modrm_ea += bx;
+ modrm_ea += bx;
break;
}
if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
(c->modrm_rm == 6 && c->modrm_mod != 0))
- if (!c->has_seg_override)
- set_seg_override(c, VCPU_SREG_SS);
- c->modrm_ea = (u16)c->modrm_ea;
+ c->modrm_seg = VCPU_SREG_SS;
+ modrm_ea = (u16)modrm_ea;
} else {
/* 32/64-bit ModR/M decode. */
if ((c->modrm_rm & 7) == 4) {
@@ -914,410 +745,74 @@
scale = sib >> 6;
if ((base_reg & 7) == 5 && c->modrm_mod == 0)
- c->modrm_ea += insn_fetch(s32, 4, c->eip);
+ modrm_ea += insn_fetch(s32, 4, c->eip);
else
- c->modrm_ea += c->regs[base_reg];
+ modrm_ea += c->regs[base_reg];
if (index_reg != 4)
- c->modrm_ea += c->regs[index_reg] << scale;
+ modrm_ea += c->regs[index_reg] << scale;
} else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) {
if (ctxt->mode == X86EMUL_MODE_PROT64)
c->rip_relative = 1;
} else
- c->modrm_ea += c->regs[c->modrm_rm];
+ modrm_ea += c->regs[c->modrm_rm];
switch (c->modrm_mod) {
case 0:
if (c->modrm_rm == 5)
- c->modrm_ea += insn_fetch(s32, 4, c->eip);
+ modrm_ea += insn_fetch(s32, 4, c->eip);
break;
case 1:
- c->modrm_ea += insn_fetch(s8, 1, c->eip);
+ modrm_ea += insn_fetch(s8, 1, c->eip);
break;
case 2:
- c->modrm_ea += insn_fetch(s32, 4, c->eip);
+ modrm_ea += insn_fetch(s32, 4, c->eip);
break;
}
}
+ op->addr.mem = modrm_ea;
done:
return rc;
}
static int decode_abs(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
+ struct x86_emulate_ops *ops,
+ struct operand *op)
{
struct decode_cache *c = &ctxt->decode;
int rc = X86EMUL_CONTINUE;
+ op->type = OP_MEM;
switch (c->ad_bytes) {
case 2:
- c->modrm_ea = insn_fetch(u16, 2, c->eip);
+ op->addr.mem = insn_fetch(u16, 2, c->eip);
break;
case 4:
- c->modrm_ea = insn_fetch(u32, 4, c->eip);
+ op->addr.mem = insn_fetch(u32, 4, c->eip);
break;
case 8:
- c->modrm_ea = insn_fetch(u64, 8, c->eip);
+ op->addr.mem = insn_fetch(u64, 8, c->eip);
break;
}
done:
return rc;
}
-int
-x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
+static void fetch_bit_operand(struct decode_cache *c)
{
- struct decode_cache *c = &ctxt->decode;
- int rc = X86EMUL_CONTINUE;
- int mode = ctxt->mode;
- int def_op_bytes, def_ad_bytes, group;
+ long sv = 0, mask;
+ if (c->dst.type == OP_MEM && c->src.type == OP_REG) {
+ mask = ~(c->dst.bytes * 8 - 1);
- /* we cannot decode insn before we complete previous rep insn */
- WARN_ON(ctxt->restart);
+ if (c->src.bytes == 2)
+ sv = (s16)c->src.val & (s16)mask;
+ else if (c->src.bytes == 4)
+ sv = (s32)c->src.val & (s32)mask;
- c->eip = ctxt->eip;
- c->fetch.start = c->fetch.end = c->eip;
- ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS);
-
- switch (mode) {
- case X86EMUL_MODE_REAL:
- case X86EMUL_MODE_VM86:
- case X86EMUL_MODE_PROT16:
- def_op_bytes = def_ad_bytes = 2;
- break;
- case X86EMUL_MODE_PROT32:
- def_op_bytes = def_ad_bytes = 4;
- break;
-#ifdef CONFIG_X86_64
- case X86EMUL_MODE_PROT64:
- def_op_bytes = 4;
- def_ad_bytes = 8;
- break;
-#endif
- default:
- return -1;
+ c->dst.addr.mem += (sv >> 3);
}
- c->op_bytes = def_op_bytes;
- c->ad_bytes = def_ad_bytes;
-
- /* Legacy prefixes. */
- for (;;) {
- switch (c->b = insn_fetch(u8, 1, c->eip)) {
- case 0x66: /* operand-size override */
- /* switch between 2/4 bytes */
- c->op_bytes = def_op_bytes ^ 6;
- break;
- case 0x67: /* address-size override */
- if (mode == X86EMUL_MODE_PROT64)
- /* switch between 4/8 bytes */
- c->ad_bytes = def_ad_bytes ^ 12;
- else
- /* switch between 2/4 bytes */
- c->ad_bytes = def_ad_bytes ^ 6;
- break;
- case 0x26: /* ES override */
- case 0x2e: /* CS override */
- case 0x36: /* SS override */
- case 0x3e: /* DS override */
- set_seg_override(c, (c->b >> 3) & 3);
- break;
- case 0x64: /* FS override */
- case 0x65: /* GS override */
- set_seg_override(c, c->b & 7);
- break;
- case 0x40 ... 0x4f: /* REX */
- if (mode != X86EMUL_MODE_PROT64)
- goto done_prefixes;
- c->rex_prefix = c->b;
- continue;
- case 0xf0: /* LOCK */
- c->lock_prefix = 1;
- break;
- case 0xf2: /* REPNE/REPNZ */
- c->rep_prefix = REPNE_PREFIX;
- break;
- case 0xf3: /* REP/REPE/REPZ */
- c->rep_prefix = REPE_PREFIX;
- break;
- default:
- goto done_prefixes;
- }
-
- /* Any legacy prefix after a REX prefix nullifies its effect. */
-
- c->rex_prefix = 0;
- }
-
-done_prefixes:
-
- /* REX prefix. */
- if (c->rex_prefix)
- if (c->rex_prefix & 8)
- c->op_bytes = 8; /* REX.W */
-
- /* Opcode byte(s). */
- c->d = opcode_table[c->b];
- if (c->d == 0) {
- /* Two-byte opcode? */
- if (c->b == 0x0f) {
- c->twobyte = 1;
- c->b = insn_fetch(u8, 1, c->eip);
- c->d = twobyte_table[c->b];
- }
- }
-
- if (c->d & Group) {
- group = c->d & GroupMask;
- c->modrm = insn_fetch(u8, 1, c->eip);
- --c->eip;
-
- group = (group << 3) + ((c->modrm >> 3) & 7);
- if ((c->d & GroupDual) && (c->modrm >> 6) == 3)
- c->d = group2_table[group];
- else
- c->d = group_table[group];
- }
-
- /* Unrecognised? */
- if (c->d == 0) {
- DPRINTF("Cannot emulate %02x\n", c->b);
- return -1;
- }
-
- if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
- c->op_bytes = 8;
-
- /* ModRM and SIB bytes. */
- if (c->d & ModRM)
- rc = decode_modrm(ctxt, ops);
- else if (c->d & MemAbs)
- rc = decode_abs(ctxt, ops);
- if (rc != X86EMUL_CONTINUE)
- goto done;
-
- if (!c->has_seg_override)
- set_seg_override(c, VCPU_SREG_DS);
-
- if (!(!c->twobyte && c->b == 0x8d))
- c->modrm_ea += seg_override_base(ctxt, ops, c);
-
- if (c->ad_bytes != 8)
- c->modrm_ea = (u32)c->modrm_ea;
-
- if (c->rip_relative)
- c->modrm_ea += c->eip;
-
- /*
- * Decode and fetch the source operand: register, memory
- * or immediate.
- */
- switch (c->d & SrcMask) {
- case SrcNone:
- break;
- case SrcReg:
- decode_register_operand(&c->src, c, 0);
- break;
- case SrcMem16:
- c->src.bytes = 2;
- goto srcmem_common;
- case SrcMem32:
- c->src.bytes = 4;
- goto srcmem_common;
- case SrcMem:
- c->src.bytes = (c->d & ByteOp) ? 1 :
- c->op_bytes;
- /* Don't fetch the address for invlpg: it could be unmapped. */
- if (c->twobyte && c->b == 0x01 && c->modrm_reg == 7)
- break;
- srcmem_common:
- /*
- * For instructions with a ModR/M byte, switch to register
- * access if Mod = 3.
- */
- if ((c->d & ModRM) && c->modrm_mod == 3) {
- c->src.type = OP_REG;
- c->src.val = c->modrm_val;
- c->src.ptr = c->modrm_ptr;
- break;
- }
- c->src.type = OP_MEM;
- c->src.ptr = (unsigned long *)c->modrm_ea;
- c->src.val = 0;
- break;
- case SrcImm:
- case SrcImmU:
- c->src.type = OP_IMM;
- c->src.ptr = (unsigned long *)c->eip;
- c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
- if (c->src.bytes == 8)
- c->src.bytes = 4;
- /* NB. Immediates are sign-extended as necessary. */
- switch (c->src.bytes) {
- case 1:
- c->src.val = insn_fetch(s8, 1, c->eip);
- break;
- case 2:
- c->src.val = insn_fetch(s16, 2, c->eip);
- break;
- case 4:
- c->src.val = insn_fetch(s32, 4, c->eip);
- break;
- }
- if ((c->d & SrcMask) == SrcImmU) {
- switch (c->src.bytes) {
- case 1:
- c->src.val &= 0xff;
- break;
- case 2:
- c->src.val &= 0xffff;
- break;
- case 4:
- c->src.val &= 0xffffffff;
- break;
- }
- }
- break;
- case SrcImmByte:
- case SrcImmUByte:
- c->src.type = OP_IMM;
- c->src.ptr = (unsigned long *)c->eip;
- c->src.bytes = 1;
- if ((c->d & SrcMask) == SrcImmByte)
- c->src.val = insn_fetch(s8, 1, c->eip);
- else
- c->src.val = insn_fetch(u8, 1, c->eip);
- break;
- case SrcAcc:
- c->src.type = OP_REG;
- c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
- c->src.ptr = &c->regs[VCPU_REGS_RAX];
- switch (c->src.bytes) {
- case 1:
- c->src.val = *(u8 *)c->src.ptr;
- break;
- case 2:
- c->src.val = *(u16 *)c->src.ptr;
- break;
- case 4:
- c->src.val = *(u32 *)c->src.ptr;
- break;
- case 8:
- c->src.val = *(u64 *)c->src.ptr;
- break;
- }
- break;
- case SrcOne:
- c->src.bytes = 1;
- c->src.val = 1;
- break;
- case SrcSI:
- c->src.type = OP_MEM;
- c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
- c->src.ptr = (unsigned long *)
- register_address(c, seg_override_base(ctxt, ops, c),
- c->regs[VCPU_REGS_RSI]);
- c->src.val = 0;
- break;
- case SrcImmFAddr:
- c->src.type = OP_IMM;
- c->src.ptr = (unsigned long *)c->eip;
- c->src.bytes = c->op_bytes + 2;
- insn_fetch_arr(c->src.valptr, c->src.bytes, c->eip);
- break;
- case SrcMemFAddr:
- c->src.type = OP_MEM;
- c->src.ptr = (unsigned long *)c->modrm_ea;
- c->src.bytes = c->op_bytes + 2;
- break;
- }
-
- /*
- * Decode and fetch the second source operand: register, memory
- * or immediate.
- */
- switch (c->d & Src2Mask) {
- case Src2None:
- break;
- case Src2CL:
- c->src2.bytes = 1;
- c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
- break;
- case Src2ImmByte:
- c->src2.type = OP_IMM;
- c->src2.ptr = (unsigned long *)c->eip;
- c->src2.bytes = 1;
- c->src2.val = insn_fetch(u8, 1, c->eip);
- break;
- case Src2One:
- c->src2.bytes = 1;
- c->src2.val = 1;
- break;
- }
-
- /* Decode and fetch the destination operand: register or memory. */
- switch (c->d & DstMask) {
- case ImplicitOps:
- /* Special instructions do their own operand decoding. */
- return 0;
- case DstReg:
- decode_register_operand(&c->dst, c,
- c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
- break;
- case DstMem:
- case DstMem64:
- if ((c->d & ModRM) && c->modrm_mod == 3) {
- c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
- c->dst.type = OP_REG;
- c->dst.val = c->dst.orig_val = c->modrm_val;
- c->dst.ptr = c->modrm_ptr;
- break;
- }
- c->dst.type = OP_MEM;
- c->dst.ptr = (unsigned long *)c->modrm_ea;
- if ((c->d & DstMask) == DstMem64)
- c->dst.bytes = 8;
- else
- c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
- c->dst.val = 0;
- if (c->d & BitOp) {
- unsigned long mask = ~(c->dst.bytes * 8 - 1);
-
- c->dst.ptr = (void *)c->dst.ptr +
- (c->src.val & mask) / 8;
- }
- break;
- case DstAcc:
- c->dst.type = OP_REG;
- c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
- c->dst.ptr = &c->regs[VCPU_REGS_RAX];
- switch (c->dst.bytes) {
- case 1:
- c->dst.val = *(u8 *)c->dst.ptr;
- break;
- case 2:
- c->dst.val = *(u16 *)c->dst.ptr;
- break;
- case 4:
- c->dst.val = *(u32 *)c->dst.ptr;
- break;
- case 8:
- c->dst.val = *(u64 *)c->dst.ptr;
- break;
- }
- c->dst.orig_val = c->dst.val;
- break;
- case DstDI:
- c->dst.type = OP_MEM;
- c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
- c->dst.ptr = (unsigned long *)
- register_address(c, es_base(ctxt, ops),
- c->regs[VCPU_REGS_RDI]);
- c->dst.val = 0;
- break;
- }
-
-done:
- return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
+ /* only subword offset */
+ c->src.val &= (c->dst.bytes << 3) - 1;
}
static int read_emulated(struct x86_emulate_ctxt *ctxt,
@@ -1337,7 +832,7 @@
rc = ops->read_emulated(addr, mc->data + mc->end, n, &err,
ctxt->vcpu);
if (rc == X86EMUL_PROPAGATE_FAULT)
- emulate_pf(ctxt, addr, err);
+ emulate_pf(ctxt);
if (rc != X86EMUL_CONTINUE)
return rc;
mc->end += n;
@@ -1424,7 +919,7 @@
addr = dt.address + index * 8;
ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
if (ret == X86EMUL_PROPAGATE_FAULT)
- emulate_pf(ctxt, addr, err);
+ emulate_pf(ctxt);
return ret;
}
@@ -1450,7 +945,7 @@
addr = dt.address + index * 8;
ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
if (ret == X86EMUL_PROPAGATE_FAULT)
- emulate_pf(ctxt, addr, err);
+ emulate_pf(ctxt);
return ret;
}
@@ -1573,6 +1068,25 @@
return X86EMUL_PROPAGATE_FAULT;
}
+static void write_register_operand(struct operand *op)
+{
+ /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
+ switch (op->bytes) {
+ case 1:
+ *(u8 *)op->addr.reg = (u8)op->val;
+ break;
+ case 2:
+ *(u16 *)op->addr.reg = (u16)op->val;
+ break;
+ case 4:
+ *op->addr.reg = (u32)op->val;
+ break; /* 64b: zero-extend */
+ case 8:
+ *op->addr.reg = op->val;
+ break;
+ }
+}
+
static inline int writeback(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops)
{
@@ -1582,28 +1096,12 @@
switch (c->dst.type) {
case OP_REG:
- /* The 4-byte case *is* correct:
- * in 64-bit mode we zero-extend.
- */
- switch (c->dst.bytes) {
- case 1:
- *(u8 *)c->dst.ptr = (u8)c->dst.val;
- break;
- case 2:
- *(u16 *)c->dst.ptr = (u16)c->dst.val;
- break;
- case 4:
- *c->dst.ptr = (u32)c->dst.val;
- break; /* 64b: zero-ext */
- case 8:
- *c->dst.ptr = c->dst.val;
- break;
- }
+ write_register_operand(&c->dst);
break;
case OP_MEM:
if (c->lock_prefix)
rc = ops->cmpxchg_emulated(
- (unsigned long)c->dst.ptr,
+ c->dst.addr.mem,
&c->dst.orig_val,
&c->dst.val,
c->dst.bytes,
@@ -1611,14 +1109,13 @@
ctxt->vcpu);
else
rc = ops->write_emulated(
- (unsigned long)c->dst.ptr,
+ c->dst.addr.mem,
&c->dst.val,
c->dst.bytes,
&err,
ctxt->vcpu);
if (rc == X86EMUL_PROPAGATE_FAULT)
- emulate_pf(ctxt,
- (unsigned long)c->dst.ptr, err);
+ emulate_pf(ctxt);
if (rc != X86EMUL_CONTINUE)
return rc;
break;
@@ -1640,8 +1137,8 @@
c->dst.bytes = c->op_bytes;
c->dst.val = c->src.val;
register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
- c->dst.ptr = (void *) register_address(c, ss_base(ctxt, ops),
- c->regs[VCPU_REGS_RSP]);
+ c->dst.addr.mem = register_address(c, ss_base(ctxt, ops),
+ c->regs[VCPU_REGS_RSP]);
}
static int emulate_pop(struct x86_emulate_ctxt *ctxt,
@@ -1701,6 +1198,9 @@
*(unsigned long *)dest =
(ctxt->eflags & ~change_mask) | (val & change_mask);
+ if (rc == X86EMUL_PROPAGATE_FAULT)
+ emulate_pf(ctxt);
+
return rc;
}
@@ -1778,6 +1278,150 @@
return rc;
}
+int emulate_int_real(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops, int irq)
+{
+ struct decode_cache *c = &ctxt->decode;
+ int rc;
+ struct desc_ptr dt;
+ gva_t cs_addr;
+ gva_t eip_addr;
+ u16 cs, eip;
+ u32 err;
+
+ /* TODO: Add limit checks */
+ c->src.val = ctxt->eflags;
+ emulate_push(ctxt, ops);
+ rc = writeback(ctxt, ops);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC);
+
+ c->src.val = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
+ emulate_push(ctxt, ops);
+ rc = writeback(ctxt, ops);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ c->src.val = c->eip;
+ emulate_push(ctxt, ops);
+ rc = writeback(ctxt, ops);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ c->dst.type = OP_NONE;
+
+ ops->get_idt(&dt, ctxt->vcpu);
+
+ eip_addr = dt.address + (irq << 2);
+ cs_addr = dt.address + (irq << 2) + 2;
+
+ rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &err);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &err);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ rc = load_segment_descriptor(ctxt, ops, cs, VCPU_SREG_CS);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ c->eip = eip;
+
+ return rc;
+}
+
+static int emulate_int(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops, int irq)
+{
+ switch(ctxt->mode) {
+ case X86EMUL_MODE_REAL:
+ return emulate_int_real(ctxt, ops, irq);
+ case X86EMUL_MODE_VM86:
+ case X86EMUL_MODE_PROT16:
+ case X86EMUL_MODE_PROT32:
+ case X86EMUL_MODE_PROT64:
+ default:
+ /* Protected mode interrupts unimplemented yet */
+ return X86EMUL_UNHANDLEABLE;
+ }
+}
+
+static int emulate_iret_real(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops)
+{
+ struct decode_cache *c = &ctxt->decode;
+ int rc = X86EMUL_CONTINUE;
+ unsigned long temp_eip = 0;
+ unsigned long temp_eflags = 0;
+ unsigned long cs = 0;
+ unsigned long mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_TF |
+ EFLG_IF | EFLG_DF | EFLG_OF | EFLG_IOPL | EFLG_NT | EFLG_RF |
+ EFLG_AC | EFLG_ID | (1 << 1); /* Last one is the reserved bit */
+ unsigned long vm86_mask = EFLG_VM | EFLG_VIF | EFLG_VIP;
+
+ /* TODO: Add stack limit check */
+
+ rc = emulate_pop(ctxt, ops, &temp_eip, c->op_bytes);
+
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ if (temp_eip & ~0xffff) {
+ emulate_gp(ctxt, 0);
+ return X86EMUL_PROPAGATE_FAULT;
+ }
+
+ rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
+
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ rc = emulate_pop(ctxt, ops, &temp_eflags, c->op_bytes);
+
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
+
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ c->eip = temp_eip;
+
+
+ if (c->op_bytes == 4)
+ ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
+ else if (c->op_bytes == 2) {
+ ctxt->eflags &= ~0xffff;
+ ctxt->eflags |= temp_eflags;
+ }
+
+ ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
+ ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
+
+ return rc;
+}
+
+static inline int emulate_iret(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops* ops)
+{
+ switch(ctxt->mode) {
+ case X86EMUL_MODE_REAL:
+ return emulate_iret_real(ctxt, ops);
+ case X86EMUL_MODE_VM86:
+ case X86EMUL_MODE_PROT16:
+ case X86EMUL_MODE_PROT32:
+ case X86EMUL_MODE_PROT64:
+ default:
+ /* iret from protected mode unimplemented yet */
+ return X86EMUL_UNHANDLEABLE;
+ }
+}
+
static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops)
{
@@ -1819,6 +1463,9 @@
struct x86_emulate_ops *ops)
{
struct decode_cache *c = &ctxt->decode;
+ unsigned long *rax = &c->regs[VCPU_REGS_RAX];
+ unsigned long *rdx = &c->regs[VCPU_REGS_RDX];
+ u8 de = 0;
switch (c->modrm_reg) {
case 0 ... 1: /* test */
@@ -1830,10 +1477,26 @@
case 3: /* neg */
emulate_1op("neg", c->dst, ctxt->eflags);
break;
+ case 4: /* mul */
+ emulate_1op_rax_rdx("mul", c->src, *rax, *rdx, ctxt->eflags);
+ break;
+ case 5: /* imul */
+ emulate_1op_rax_rdx("imul", c->src, *rax, *rdx, ctxt->eflags);
+ break;
+ case 6: /* div */
+ emulate_1op_rax_rdx_ex("div", c->src, *rax, *rdx,
+ ctxt->eflags, de);
+ break;
+ case 7: /* idiv */
+ emulate_1op_rax_rdx_ex("idiv", c->src, *rax, *rdx,
+ ctxt->eflags, de);
+ break;
default:
- return 0;
+ return X86EMUL_UNHANDLEABLE;
}
- return 1;
+ if (de)
+ return emulate_de(ctxt);
+ return X86EMUL_CONTINUE;
}
static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
@@ -1905,6 +1568,23 @@
return rc;
}
+static int emulate_load_segment(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops, int seg)
+{
+ struct decode_cache *c = &ctxt->decode;
+ unsigned short sel;
+ int rc;
+
+ memcpy(&sel, c->src.valptr + c->op_bytes, 2);
+
+ rc = load_segment_descriptor(ctxt, ops, sel, seg);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ c->dst.val = c->src.val;
+ return rc;
+}
+
static inline void
setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops, struct desc_struct *cs,
@@ -2160,9 +1840,15 @@
struct x86_emulate_ops *ops,
u16 port, u16 len)
{
+ if (ctxt->perm_ok)
+ return true;
+
if (emulator_bad_iopl(ctxt, ops))
if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
return false;
+
+ ctxt->perm_ok = true;
+
return true;
}
@@ -2254,7 +1940,7 @@
&err);
if (ret == X86EMUL_PROPAGATE_FAULT) {
/* FIXME: need to provide precise fault address */
- emulate_pf(ctxt, old_tss_base, err);
+ emulate_pf(ctxt);
return ret;
}
@@ -2264,7 +1950,7 @@
&err);
if (ret == X86EMUL_PROPAGATE_FAULT) {
/* FIXME: need to provide precise fault address */
- emulate_pf(ctxt, old_tss_base, err);
+ emulate_pf(ctxt);
return ret;
}
@@ -2272,7 +1958,7 @@
&err);
if (ret == X86EMUL_PROPAGATE_FAULT) {
/* FIXME: need to provide precise fault address */
- emulate_pf(ctxt, new_tss_base, err);
+ emulate_pf(ctxt);
return ret;
}
@@ -2285,7 +1971,7 @@
ctxt->vcpu, &err);
if (ret == X86EMUL_PROPAGATE_FAULT) {
/* FIXME: need to provide precise fault address */
- emulate_pf(ctxt, new_tss_base, err);
+ emulate_pf(ctxt);
return ret;
}
}
@@ -2396,7 +2082,7 @@
&err);
if (ret == X86EMUL_PROPAGATE_FAULT) {
/* FIXME: need to provide precise fault address */
- emulate_pf(ctxt, old_tss_base, err);
+ emulate_pf(ctxt);
return ret;
}
@@ -2406,7 +2092,7 @@
&err);
if (ret == X86EMUL_PROPAGATE_FAULT) {
/* FIXME: need to provide precise fault address */
- emulate_pf(ctxt, old_tss_base, err);
+ emulate_pf(ctxt);
return ret;
}
@@ -2414,7 +2100,7 @@
&err);
if (ret == X86EMUL_PROPAGATE_FAULT) {
/* FIXME: need to provide precise fault address */
- emulate_pf(ctxt, new_tss_base, err);
+ emulate_pf(ctxt);
return ret;
}
@@ -2427,7 +2113,7 @@
ctxt->vcpu, &err);
if (ret == X86EMUL_PROPAGATE_FAULT) {
/* FIXME: need to provide precise fault address */
- emulate_pf(ctxt, new_tss_base, err);
+ emulate_pf(ctxt);
return ret;
}
}
@@ -2523,10 +2209,10 @@
}
int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops,
u16 tss_selector, int reason,
bool has_error_code, u32 error_code)
{
+ struct x86_emulate_ops *ops = ctxt->ops;
struct decode_cache *c = &ctxt->decode;
int rc;
@@ -2552,16 +2238,784 @@
int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
register_address_increment(c, &c->regs[reg], df * op->bytes);
- op->ptr = (unsigned long *)register_address(c, base, c->regs[reg]);
+ op->addr.mem = register_address(c, base, c->regs[reg]);
+}
+
+static int em_push(struct x86_emulate_ctxt *ctxt)
+{
+ emulate_push(ctxt, ctxt->ops);
+ return X86EMUL_CONTINUE;
+}
+
+static int em_das(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ u8 al, old_al;
+ bool af, cf, old_cf;
+
+ cf = ctxt->eflags & X86_EFLAGS_CF;
+ al = c->dst.val;
+
+ old_al = al;
+ old_cf = cf;
+ cf = false;
+ af = ctxt->eflags & X86_EFLAGS_AF;
+ if ((al & 0x0f) > 9 || af) {
+ al -= 6;
+ cf = old_cf | (al >= 250);
+ af = true;
+ } else {
+ af = false;
+ }
+ if (old_al > 0x99 || old_cf) {
+ al -= 0x60;
+ cf = true;
+ }
+
+ c->dst.val = al;
+ /* Set PF, ZF, SF */
+ c->src.type = OP_IMM;
+ c->src.val = 0;
+ c->src.bytes = 1;
+ emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
+ ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
+ if (cf)
+ ctxt->eflags |= X86_EFLAGS_CF;
+ if (af)
+ ctxt->eflags |= X86_EFLAGS_AF;
+ return X86EMUL_CONTINUE;
+}
+
+static int em_call_far(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ u16 sel, old_cs;
+ ulong old_eip;
+ int rc;
+
+ old_cs = ctxt->ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
+ old_eip = c->eip;
+
+ memcpy(&sel, c->src.valptr + c->op_bytes, 2);
+ if (load_segment_descriptor(ctxt, ctxt->ops, sel, VCPU_SREG_CS))
+ return X86EMUL_CONTINUE;
+
+ c->eip = 0;
+ memcpy(&c->eip, c->src.valptr, c->op_bytes);
+
+ c->src.val = old_cs;
+ emulate_push(ctxt, ctxt->ops);
+ rc = writeback(ctxt, ctxt->ops);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ c->src.val = old_eip;
+ emulate_push(ctxt, ctxt->ops);
+ rc = writeback(ctxt, ctxt->ops);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ c->dst.type = OP_NONE;
+
+ return X86EMUL_CONTINUE;
+}
+
+static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ int rc;
+
+ c->dst.type = OP_REG;
+ c->dst.addr.reg = &c->eip;
+ c->dst.bytes = c->op_bytes;
+ rc = emulate_pop(ctxt, ctxt->ops, &c->dst.val, c->op_bytes);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+ register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.val);
+ return X86EMUL_CONTINUE;
+}
+
+static int em_imul(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+
+ emulate_2op_SrcV_nobyte("imul", c->src, c->dst, ctxt->eflags);
+ return X86EMUL_CONTINUE;
+}
+
+static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+
+ c->dst.val = c->src2.val;
+ return em_imul(ctxt);
+}
+
+static int em_cwd(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+
+ c->dst.type = OP_REG;
+ c->dst.bytes = c->src.bytes;
+ c->dst.addr.reg = &c->regs[VCPU_REGS_RDX];
+ c->dst.val = ~((c->src.val >> (c->src.bytes * 8 - 1)) - 1);
+
+ return X86EMUL_CONTINUE;
+}
+
+static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
+{
+ unsigned cpl = ctxt->ops->cpl(ctxt->vcpu);
+ struct decode_cache *c = &ctxt->decode;
+ u64 tsc = 0;
+
+ if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD)) {
+ emulate_gp(ctxt, 0);
+ return X86EMUL_PROPAGATE_FAULT;
+ }
+ ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc);
+ c->regs[VCPU_REGS_RAX] = (u32)tsc;
+ c->regs[VCPU_REGS_RDX] = tsc >> 32;
+ return X86EMUL_CONTINUE;
+}
+
+static int em_mov(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ c->dst.val = c->src.val;
+ return X86EMUL_CONTINUE;
+}
+
+#define D(_y) { .flags = (_y) }
+#define N D(0)
+#define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) }
+#define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) }
+#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
+
+#define D2bv(_f) D((_f) | ByteOp), D(_f)
+#define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
+
+#define D6ALU(_f) D2bv((_f) | DstMem | SrcReg | ModRM), \
+ D2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock), \
+ D2bv(((_f) & ~Lock) | DstAcc | SrcImm)
+
+
+static struct opcode group1[] = {
+ X7(D(Lock)), N
+};
+
+static struct opcode group1A[] = {
+ D(DstMem | SrcNone | ModRM | Mov | Stack), N, N, N, N, N, N, N,
+};
+
+static struct opcode group3[] = {
+ D(DstMem | SrcImm | ModRM), D(DstMem | SrcImm | ModRM),
+ D(DstMem | SrcNone | ModRM | Lock), D(DstMem | SrcNone | ModRM | Lock),
+ X4(D(SrcMem | ModRM)),
+};
+
+static struct opcode group4[] = {
+ D(ByteOp | DstMem | SrcNone | ModRM | Lock), D(ByteOp | DstMem | SrcNone | ModRM | Lock),
+ N, N, N, N, N, N,
+};
+
+static struct opcode group5[] = {
+ D(DstMem | SrcNone | ModRM | Lock), D(DstMem | SrcNone | ModRM | Lock),
+ D(SrcMem | ModRM | Stack),
+ I(SrcMemFAddr | ModRM | ImplicitOps | Stack, em_call_far),
+ D(SrcMem | ModRM | Stack), D(SrcMemFAddr | ModRM | ImplicitOps),
+ D(SrcMem | ModRM | Stack), N,
+};
+
+static struct group_dual group7 = { {
+ N, N, D(ModRM | SrcMem | Priv), D(ModRM | SrcMem | Priv),
+ D(SrcNone | ModRM | DstMem | Mov), N,
+ D(SrcMem16 | ModRM | Mov | Priv),
+ D(SrcMem | ModRM | ByteOp | Priv | NoAccess),
+}, {
+ D(SrcNone | ModRM | Priv), N, N, D(SrcNone | ModRM | Priv),
+ D(SrcNone | ModRM | DstMem | Mov), N,
+ D(SrcMem16 | ModRM | Mov | Priv), N,
+} };
+
+static struct opcode group8[] = {
+ N, N, N, N,
+ D(DstMem | SrcImmByte | ModRM), D(DstMem | SrcImmByte | ModRM | Lock),
+ D(DstMem | SrcImmByte | ModRM | Lock), D(DstMem | SrcImmByte | ModRM | Lock),
+};
+
+static struct group_dual group9 = { {
+ N, D(DstMem64 | ModRM | Lock), N, N, N, N, N, N,
+}, {
+ N, N, N, N, N, N, N, N,
+} };
+
+static struct opcode group11[] = {
+ I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)),
+};
+
+static struct opcode opcode_table[256] = {
+ /* 0x00 - 0x07 */
+ D6ALU(Lock),
+ D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
+ /* 0x08 - 0x0F */
+ D6ALU(Lock),
+ D(ImplicitOps | Stack | No64), N,
+ /* 0x10 - 0x17 */
+ D6ALU(Lock),
+ D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
+ /* 0x18 - 0x1F */
+ D6ALU(Lock),
+ D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
+ /* 0x20 - 0x27 */
+ D6ALU(Lock), N, N,
+ /* 0x28 - 0x2F */
+ D6ALU(Lock), N, I(ByteOp | DstAcc | No64, em_das),
+ /* 0x30 - 0x37 */
+ D6ALU(Lock), N, N,
+ /* 0x38 - 0x3F */
+ D6ALU(0), N, N,
+ /* 0x40 - 0x4F */
+ X16(D(DstReg)),
+ /* 0x50 - 0x57 */
+ X8(I(SrcReg | Stack, em_push)),
+ /* 0x58 - 0x5F */
+ X8(D(DstReg | Stack)),
+ /* 0x60 - 0x67 */
+ D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
+ N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ ,
+ N, N, N, N,
+ /* 0x68 - 0x6F */
+ I(SrcImm | Mov | Stack, em_push),
+ I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
+ I(SrcImmByte | Mov | Stack, em_push),
+ I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
+ D2bv(DstDI | Mov | String), /* insb, insw/insd */
+ D2bv(SrcSI | ImplicitOps | String), /* outsb, outsw/outsd */
+ /* 0x70 - 0x7F */
+ X16(D(SrcImmByte)),
+ /* 0x80 - 0x87 */
+ G(ByteOp | DstMem | SrcImm | ModRM | Group, group1),
+ G(DstMem | SrcImm | ModRM | Group, group1),
+ G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1),
+ G(DstMem | SrcImmByte | ModRM | Group, group1),
+ D2bv(DstMem | SrcReg | ModRM), D2bv(DstMem | SrcReg | ModRM | Lock),
+ /* 0x88 - 0x8F */
+ I2bv(DstMem | SrcReg | ModRM | Mov, em_mov),
+ I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
+ D(DstMem | SrcNone | ModRM | Mov), D(ModRM | SrcMem | NoAccess | DstReg),
+ D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A),
+ /* 0x90 - 0x97 */
+ X8(D(SrcAcc | DstReg)),
+ /* 0x98 - 0x9F */
+ D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
+ I(SrcImmFAddr | No64, em_call_far), N,
+ D(ImplicitOps | Stack), D(ImplicitOps | Stack), N, N,
+ /* 0xA0 - 0xA7 */
+ I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
+ I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov),
+ I2bv(SrcSI | DstDI | Mov | String, em_mov),
+ D2bv(SrcSI | DstDI | String),
+ /* 0xA8 - 0xAF */
+ D2bv(DstAcc | SrcImm),
+ I2bv(SrcAcc | DstDI | Mov | String, em_mov),
+ I2bv(SrcSI | DstAcc | Mov | String, em_mov),
+ D2bv(SrcAcc | DstDI | String),
+ /* 0xB0 - 0xB7 */
+ X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
+ /* 0xB8 - 0xBF */
+ X8(I(DstReg | SrcImm | Mov, em_mov)),
+ /* 0xC0 - 0xC7 */
+ D2bv(DstMem | SrcImmByte | ModRM),
+ I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),
+ D(ImplicitOps | Stack),
+ D(DstReg | SrcMemFAddr | ModRM | No64), D(DstReg | SrcMemFAddr | ModRM | No64),
+ G(ByteOp, group11), G(0, group11),
+ /* 0xC8 - 0xCF */
+ N, N, N, D(ImplicitOps | Stack),
+ D(ImplicitOps), D(SrcImmByte), D(ImplicitOps | No64), D(ImplicitOps),
+ /* 0xD0 - 0xD7 */
+ D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM),
+ N, N, N, N,
+ /* 0xD8 - 0xDF */
+ N, N, N, N, N, N, N, N,
+ /* 0xE0 - 0xE7 */
+ X4(D(SrcImmByte)),
+ D2bv(SrcImmUByte | DstAcc), D2bv(SrcAcc | DstImmUByte),
+ /* 0xE8 - 0xEF */
+ D(SrcImm | Stack), D(SrcImm | ImplicitOps),
+ D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps),
+ D2bv(SrcNone | DstAcc), D2bv(SrcAcc | ImplicitOps),
+ /* 0xF0 - 0xF7 */
+ N, N, N, N,
+ D(ImplicitOps | Priv), D(ImplicitOps), G(ByteOp, group3), G(0, group3),
+ /* 0xF8 - 0xFF */
+ D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps),
+ D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
+};
+
+static struct opcode twobyte_table[256] = {
+ /* 0x00 - 0x0F */
+ N, GD(0, &group7), N, N,
+ N, D(ImplicitOps), D(ImplicitOps | Priv), N,
+ D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N,
+ N, D(ImplicitOps | ModRM), N, N,
+ /* 0x10 - 0x1F */
+ N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N,
+ /* 0x20 - 0x2F */
+ D(ModRM | DstMem | Priv | Op3264), D(ModRM | DstMem | Priv | Op3264),
+ D(ModRM | SrcMem | Priv | Op3264), D(ModRM | SrcMem | Priv | Op3264),
+ N, N, N, N,
+ N, N, N, N, N, N, N, N,
+ /* 0x30 - 0x3F */
+ D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc),
+ D(ImplicitOps | Priv), N,
+ D(ImplicitOps), D(ImplicitOps | Priv), N, N,
+ N, N, N, N, N, N, N, N,
+ /* 0x40 - 0x4F */
+ X16(D(DstReg | SrcMem | ModRM | Mov)),
+ /* 0x50 - 0x5F */
+ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+ /* 0x60 - 0x6F */
+ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+ /* 0x70 - 0x7F */
+ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+ /* 0x80 - 0x8F */
+ X16(D(SrcImm)),
+ /* 0x90 - 0x9F */
+ X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
+ /* 0xA0 - 0xA7 */
+ D(ImplicitOps | Stack), D(ImplicitOps | Stack),
+ N, D(DstMem | SrcReg | ModRM | BitOp),
+ D(DstMem | SrcReg | Src2ImmByte | ModRM),
+ D(DstMem | SrcReg | Src2CL | ModRM), N, N,
+ /* 0xA8 - 0xAF */
+ D(ImplicitOps | Stack), D(ImplicitOps | Stack),
+ N, D(DstMem | SrcReg | ModRM | BitOp | Lock),
+ D(DstMem | SrcReg | Src2ImmByte | ModRM),
+ D(DstMem | SrcReg | Src2CL | ModRM),
+ D(ModRM), I(DstReg | SrcMem | ModRM, em_imul),
+ /* 0xB0 - 0xB7 */
+ D2bv(DstMem | SrcReg | ModRM | Lock),
+ D(DstReg | SrcMemFAddr | ModRM), D(DstMem | SrcReg | ModRM | BitOp | Lock),
+ D(DstReg | SrcMemFAddr | ModRM), D(DstReg | SrcMemFAddr | ModRM),
+ D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
+ /* 0xB8 - 0xBF */
+ N, N,
+ G(BitOp, group8), D(DstMem | SrcReg | ModRM | BitOp | Lock),
+ D(DstReg | SrcMem | ModRM), D(DstReg | SrcMem | ModRM),
+ D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
+ /* 0xC0 - 0xCF */
+ D2bv(DstMem | SrcReg | ModRM | Lock),
+ N, D(DstMem | SrcReg | ModRM | Mov),
+ N, N, N, GD(0, &group9),
+ N, N, N, N, N, N, N, N,
+ /* 0xD0 - 0xDF */
+ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+ /* 0xE0 - 0xEF */
+ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+ /* 0xF0 - 0xFF */
+ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
+};
+
+#undef D
+#undef N
+#undef G
+#undef GD
+#undef I
+
+#undef D2bv
+#undef I2bv
+#undef D6ALU
+
+static unsigned imm_size(struct decode_cache *c)
+{
+ unsigned size;
+
+ size = (c->d & ByteOp) ? 1 : c->op_bytes;
+ if (size == 8)
+ size = 4;
+ return size;
+}
+
+static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
+ unsigned size, bool sign_extension)
+{
+ struct decode_cache *c = &ctxt->decode;
+ struct x86_emulate_ops *ops = ctxt->ops;
+ int rc = X86EMUL_CONTINUE;
+
+ op->type = OP_IMM;
+ op->bytes = size;
+ op->addr.mem = c->eip;
+ /* NB. Immediates are sign-extended as necessary. */
+ switch (op->bytes) {
+ case 1:
+ op->val = insn_fetch(s8, 1, c->eip);
+ break;
+ case 2:
+ op->val = insn_fetch(s16, 2, c->eip);
+ break;
+ case 4:
+ op->val = insn_fetch(s32, 4, c->eip);
+ break;
+ }
+ if (!sign_extension) {
+ switch (op->bytes) {
+ case 1:
+ op->val &= 0xff;
+ break;
+ case 2:
+ op->val &= 0xffff;
+ break;
+ case 4:
+ op->val &= 0xffffffff;
+ break;
+ }
+ }
+done:
+ return rc;
}
int
-x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
+x86_decode_insn(struct x86_emulate_ctxt *ctxt)
{
+ struct x86_emulate_ops *ops = ctxt->ops;
+ struct decode_cache *c = &ctxt->decode;
+ int rc = X86EMUL_CONTINUE;
+ int mode = ctxt->mode;
+ int def_op_bytes, def_ad_bytes, dual, goffset;
+ struct opcode opcode, *g_mod012, *g_mod3;
+ struct operand memop = { .type = OP_NONE };
+
+ c->eip = ctxt->eip;
+ c->fetch.start = c->fetch.end = c->eip;
+ ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS);
+
+ switch (mode) {
+ case X86EMUL_MODE_REAL:
+ case X86EMUL_MODE_VM86:
+ case X86EMUL_MODE_PROT16:
+ def_op_bytes = def_ad_bytes = 2;
+ break;
+ case X86EMUL_MODE_PROT32:
+ def_op_bytes = def_ad_bytes = 4;
+ break;
+#ifdef CONFIG_X86_64
+ case X86EMUL_MODE_PROT64:
+ def_op_bytes = 4;
+ def_ad_bytes = 8;
+ break;
+#endif
+ default:
+ return -1;
+ }
+
+ c->op_bytes = def_op_bytes;
+ c->ad_bytes = def_ad_bytes;
+
+ /* Legacy prefixes. */
+ for (;;) {
+ switch (c->b = insn_fetch(u8, 1, c->eip)) {
+ case 0x66: /* operand-size override */
+ /* switch between 2/4 bytes */
+ c->op_bytes = def_op_bytes ^ 6;
+ break;
+ case 0x67: /* address-size override */
+ if (mode == X86EMUL_MODE_PROT64)
+ /* switch between 4/8 bytes */
+ c->ad_bytes = def_ad_bytes ^ 12;
+ else
+ /* switch between 2/4 bytes */
+ c->ad_bytes = def_ad_bytes ^ 6;
+ break;
+ case 0x26: /* ES override */
+ case 0x2e: /* CS override */
+ case 0x36: /* SS override */
+ case 0x3e: /* DS override */
+ set_seg_override(c, (c->b >> 3) & 3);
+ break;
+ case 0x64: /* FS override */
+ case 0x65: /* GS override */
+ set_seg_override(c, c->b & 7);
+ break;
+ case 0x40 ... 0x4f: /* REX */
+ if (mode != X86EMUL_MODE_PROT64)
+ goto done_prefixes;
+ c->rex_prefix = c->b;
+ continue;
+ case 0xf0: /* LOCK */
+ c->lock_prefix = 1;
+ break;
+ case 0xf2: /* REPNE/REPNZ */
+ c->rep_prefix = REPNE_PREFIX;
+ break;
+ case 0xf3: /* REP/REPE/REPZ */
+ c->rep_prefix = REPE_PREFIX;
+ break;
+ default:
+ goto done_prefixes;
+ }
+
+ /* Any legacy prefix after a REX prefix nullifies its effect. */
+
+ c->rex_prefix = 0;
+ }
+
+done_prefixes:
+
+ /* REX prefix. */
+ if (c->rex_prefix & 8)
+ c->op_bytes = 8; /* REX.W */
+
+ /* Opcode byte(s). */
+ opcode = opcode_table[c->b];
+ /* Two-byte opcode? */
+ if (c->b == 0x0f) {
+ c->twobyte = 1;
+ c->b = insn_fetch(u8, 1, c->eip);
+ opcode = twobyte_table[c->b];
+ }
+ c->d = opcode.flags;
+
+ if (c->d & Group) {
+ dual = c->d & GroupDual;
+ c->modrm = insn_fetch(u8, 1, c->eip);
+ --c->eip;
+
+ if (c->d & GroupDual) {
+ g_mod012 = opcode.u.gdual->mod012;
+ g_mod3 = opcode.u.gdual->mod3;
+ } else
+ g_mod012 = g_mod3 = opcode.u.group;
+
+ c->d &= ~(Group | GroupDual);
+
+ goffset = (c->modrm >> 3) & 7;
+
+ if ((c->modrm >> 6) == 3)
+ opcode = g_mod3[goffset];
+ else
+ opcode = g_mod012[goffset];
+ c->d |= opcode.flags;
+ }
+
+ c->execute = opcode.u.execute;
+
+ /* Unrecognised? */
+ if (c->d == 0 || (c->d & Undefined)) {
+ DPRINTF("Cannot emulate %02x\n", c->b);
+ return -1;
+ }
+
+ if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
+ c->op_bytes = 8;
+
+ if (c->d & Op3264) {
+ if (mode == X86EMUL_MODE_PROT64)
+ c->op_bytes = 8;
+ else
+ c->op_bytes = 4;
+ }
+
+ /* ModRM and SIB bytes. */
+ if (c->d & ModRM) {
+ rc = decode_modrm(ctxt, ops, &memop);
+ if (!c->has_seg_override)
+ set_seg_override(c, c->modrm_seg);
+ } else if (c->d & MemAbs)
+ rc = decode_abs(ctxt, ops, &memop);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+
+ if (!c->has_seg_override)
+ set_seg_override(c, VCPU_SREG_DS);
+
+ if (memop.type == OP_MEM && !(!c->twobyte && c->b == 0x8d))
+ memop.addr.mem += seg_override_base(ctxt, ops, c);
+
+ if (memop.type == OP_MEM && c->ad_bytes != 8)
+ memop.addr.mem = (u32)memop.addr.mem;
+
+ if (memop.type == OP_MEM && c->rip_relative)
+ memop.addr.mem += c->eip;
+
+ /*
+ * Decode and fetch the source operand: register, memory
+ * or immediate.
+ */
+ switch (c->d & SrcMask) {
+ case SrcNone:
+ break;
+ case SrcReg:
+ decode_register_operand(&c->src, c, 0);
+ break;
+ case SrcMem16:
+ memop.bytes = 2;
+ goto srcmem_common;
+ case SrcMem32:
+ memop.bytes = 4;
+ goto srcmem_common;
+ case SrcMem:
+ memop.bytes = (c->d & ByteOp) ? 1 :
+ c->op_bytes;
+ srcmem_common:
+ c->src = memop;
+ break;
+ case SrcImmU16:
+ rc = decode_imm(ctxt, &c->src, 2, false);
+ break;
+ case SrcImm:
+ rc = decode_imm(ctxt, &c->src, imm_size(c), true);
+ break;
+ case SrcImmU:
+ rc = decode_imm(ctxt, &c->src, imm_size(c), false);
+ break;
+ case SrcImmByte:
+ rc = decode_imm(ctxt, &c->src, 1, true);
+ break;
+ case SrcImmUByte:
+ rc = decode_imm(ctxt, &c->src, 1, false);
+ break;
+ case SrcAcc:
+ c->src.type = OP_REG;
+ c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
+ c->src.addr.reg = &c->regs[VCPU_REGS_RAX];
+ fetch_register_operand(&c->src);
+ break;
+ case SrcOne:
+ c->src.bytes = 1;
+ c->src.val = 1;
+ break;
+ case SrcSI:
+ c->src.type = OP_MEM;
+ c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
+ c->src.addr.mem =
+ register_address(c, seg_override_base(ctxt, ops, c),
+ c->regs[VCPU_REGS_RSI]);
+ c->src.val = 0;
+ break;
+ case SrcImmFAddr:
+ c->src.type = OP_IMM;
+ c->src.addr.mem = c->eip;
+ c->src.bytes = c->op_bytes + 2;
+ insn_fetch_arr(c->src.valptr, c->src.bytes, c->eip);
+ break;
+ case SrcMemFAddr:
+ memop.bytes = c->op_bytes + 2;
+ goto srcmem_common;
+ break;
+ }
+
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+
+ /*
+ * Decode and fetch the second source operand: register, memory
+ * or immediate.
+ */
+ switch (c->d & Src2Mask) {
+ case Src2None:
+ break;
+ case Src2CL:
+ c->src2.bytes = 1;
+ c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
+ break;
+ case Src2ImmByte:
+ rc = decode_imm(ctxt, &c->src2, 1, true);
+ break;
+ case Src2One:
+ c->src2.bytes = 1;
+ c->src2.val = 1;
+ break;
+ case Src2Imm:
+ rc = decode_imm(ctxt, &c->src2, imm_size(c), true);
+ break;
+ }
+
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+
+ /* Decode and fetch the destination operand: register or memory. */
+ switch (c->d & DstMask) {
+ case DstReg:
+ decode_register_operand(&c->dst, c,
+ c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
+ break;
+ case DstImmUByte:
+ c->dst.type = OP_IMM;
+ c->dst.addr.mem = c->eip;
+ c->dst.bytes = 1;
+ c->dst.val = insn_fetch(u8, 1, c->eip);
+ break;
+ case DstMem:
+ case DstMem64:
+ c->dst = memop;
+ if ((c->d & DstMask) == DstMem64)
+ c->dst.bytes = 8;
+ else
+ c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
+ if (c->d & BitOp)
+ fetch_bit_operand(c);
+ c->dst.orig_val = c->dst.val;
+ break;
+ case DstAcc:
+ c->dst.type = OP_REG;
+ c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
+ c->dst.addr.reg = &c->regs[VCPU_REGS_RAX];
+ fetch_register_operand(&c->dst);
+ c->dst.orig_val = c->dst.val;
+ break;
+ case DstDI:
+ c->dst.type = OP_MEM;
+ c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
+ c->dst.addr.mem =
+ register_address(c, es_base(ctxt, ops),
+ c->regs[VCPU_REGS_RDI]);
+ c->dst.val = 0;
+ break;
+ case ImplicitOps:
+ /* Special instructions do their own operand decoding. */
+ default:
+ c->dst.type = OP_NONE; /* Disable writeback. */
+ return 0;
+ }
+
+done:
+ return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
+}
+
+static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+
+ /* The second termination condition only applies for REPE
+ * and REPNE. Test if the repeat string operation prefix is
+ * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
+ * corresponding termination condition according to:
+ * - if REPE/REPZ and ZF = 0 then done
+ * - if REPNE/REPNZ and ZF = 1 then done
+ */
+ if (((c->b == 0xa6) || (c->b == 0xa7) ||
+ (c->b == 0xae) || (c->b == 0xaf))
+ && (((c->rep_prefix == REPE_PREFIX) &&
+ ((ctxt->eflags & EFLG_ZF) == 0))
+ || ((c->rep_prefix == REPNE_PREFIX) &&
+ ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))))
+ return true;
+
+ return false;
+}
+
+int
+x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
+{
+ struct x86_emulate_ops *ops = ctxt->ops;
u64 msr_data;
struct decode_cache *c = &ctxt->decode;
int rc = X86EMUL_CONTINUE;
int saved_dst_type = c->dst.type;
+ int irq; /* Used for int 3, int, and into */
ctxt->decode.mem_read.pos = 0;
@@ -2576,6 +3030,11 @@
goto done;
}
+ if ((c->d & SrcMask) == SrcMemFAddr && c->src.type != OP_MEM) {
+ emulate_ud(ctxt);
+ goto done;
+ }
+
/* Privileged instruction can be executed only in CPL=0 */
if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
emulate_gp(ctxt, 0);
@@ -2583,35 +3042,15 @@
}
if (c->rep_prefix && (c->d & String)) {
- ctxt->restart = true;
/* All REP prefixes have the same first termination condition */
if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) {
- string_done:
- ctxt->restart = false;
ctxt->eip = c->eip;
goto done;
}
- /* The second termination condition only applies for REPE
- * and REPNE. Test if the repeat string operation prefix is
- * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
- * corresponding termination condition according to:
- * - if REPE/REPZ and ZF = 0 then done
- * - if REPNE/REPNZ and ZF = 1 then done
- */
- if ((c->b == 0xa6) || (c->b == 0xa7) ||
- (c->b == 0xae) || (c->b == 0xaf)) {
- if ((c->rep_prefix == REPE_PREFIX) &&
- ((ctxt->eflags & EFLG_ZF) == 0))
- goto string_done;
- if ((c->rep_prefix == REPNE_PREFIX) &&
- ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))
- goto string_done;
- }
- c->eip = ctxt->eip;
}
- if (c->src.type == OP_MEM) {
- rc = read_emulated(ctxt, ops, (unsigned long)c->src.ptr,
+ if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) {
+ rc = read_emulated(ctxt, ops, c->src.addr.mem,
c->src.valptr, c->src.bytes);
if (rc != X86EMUL_CONTINUE)
goto done;
@@ -2619,7 +3058,7 @@
}
if (c->src2.type == OP_MEM) {
- rc = read_emulated(ctxt, ops, (unsigned long)c->src2.ptr,
+ rc = read_emulated(ctxt, ops, c->src2.addr.mem,
&c->src2.val, c->src2.bytes);
if (rc != X86EMUL_CONTINUE)
goto done;
@@ -2631,7 +3070,7 @@
if ((c->dst.type == OP_MEM) && !(c->d & Mov)) {
/* optimisation - avoid slow emulated read if Mov */
- rc = read_emulated(ctxt, ops, (unsigned long)c->dst.ptr,
+ rc = read_emulated(ctxt, ops, c->dst.addr.mem,
&c->dst.val, c->dst.bytes);
if (rc != X86EMUL_CONTINUE)
goto done;
@@ -2640,6 +3079,13 @@
special_insn:
+ if (c->execute) {
+ rc = c->execute(ctxt);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+ goto writeback;
+ }
+
if (c->twobyte)
goto twobyte_insn;
@@ -2653,8 +3099,6 @@
break;
case 0x07: /* pop es */
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
- if (rc != X86EMUL_CONTINUE)
- goto done;
break;
case 0x08 ... 0x0d:
or: /* or */
@@ -2672,8 +3116,6 @@
break;
case 0x17: /* pop ss */
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
- if (rc != X86EMUL_CONTINUE)
- goto done;
break;
case 0x18 ... 0x1d:
sbb: /* sbb */
@@ -2684,8 +3126,6 @@
break;
case 0x1f: /* pop ds */
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
- if (rc != X86EMUL_CONTINUE)
- goto done;
break;
case 0x20 ... 0x25:
and: /* and */
@@ -2709,58 +3149,29 @@
case 0x48 ... 0x4f: /* dec r16/r32 */
emulate_1op("dec", c->dst, ctxt->eflags);
break;
- case 0x50 ... 0x57: /* push reg */
- emulate_push(ctxt, ops);
- break;
case 0x58 ... 0x5f: /* pop reg */
pop_instruction:
rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
- if (rc != X86EMUL_CONTINUE)
- goto done;
break;
case 0x60: /* pusha */
rc = emulate_pusha(ctxt, ops);
- if (rc != X86EMUL_CONTINUE)
- goto done;
break;
case 0x61: /* popa */
rc = emulate_popa(ctxt, ops);
- if (rc != X86EMUL_CONTINUE)
- goto done;
break;
case 0x63: /* movsxd */
if (ctxt->mode != X86EMUL_MODE_PROT64)
goto cannot_emulate;
c->dst.val = (s32) c->src.val;
break;
- case 0x68: /* push imm */
- case 0x6a: /* push imm8 */
- emulate_push(ctxt, ops);
- break;
case 0x6c: /* insb */
case 0x6d: /* insw/insd */
- c->dst.bytes = min(c->dst.bytes, 4u);
- if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
- c->dst.bytes)) {
- emulate_gp(ctxt, 0);
- goto done;
- }
- if (!pio_in_emulated(ctxt, ops, c->dst.bytes,
- c->regs[VCPU_REGS_RDX], &c->dst.val))
- goto done; /* IO is needed, skip writeback */
- break;
+ c->src.val = c->regs[VCPU_REGS_RDX];
+ goto do_io_in;
case 0x6e: /* outsb */
case 0x6f: /* outsw/outsd */
- c->src.bytes = min(c->src.bytes, 4u);
- if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
- c->src.bytes)) {
- emulate_gp(ctxt, 0);
- goto done;
- }
- ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX],
- &c->src.val, 1, ctxt->vcpu);
-
- c->dst.type = OP_NONE; /* nothing to writeback */
+ c->dst.val = c->regs[VCPU_REGS_RDX];
+ goto do_io_out;
break;
case 0x70 ... 0x7f: /* jcc (short) */
if (test_cc(c->b, ctxt->eflags))
@@ -2793,29 +3204,15 @@
case 0x86 ... 0x87: /* xchg */
xchg:
/* Write back the register source. */
- switch (c->dst.bytes) {
- case 1:
- *(u8 *) c->src.ptr = (u8) c->dst.val;
- break;
- case 2:
- *(u16 *) c->src.ptr = (u16) c->dst.val;
- break;
- case 4:
- *c->src.ptr = (u32) c->dst.val;
- break; /* 64b reg: zero-extend */
- case 8:
- *c->src.ptr = c->dst.val;
- break;
- }
+ c->src.val = c->dst.val;
+ write_register_operand(&c->src);
/*
* Write back the memory destination with implicit LOCK
* prefix.
*/
- c->dst.val = c->src.val;
+ c->dst.val = c->src.orig_val;
c->lock_prefix = 1;
break;
- case 0x88 ... 0x8b: /* mov */
- goto mov;
case 0x8c: /* mov r/m, sreg */
if (c->modrm_reg > VCPU_SREG_GS) {
emulate_ud(ctxt);
@@ -2824,7 +3221,7 @@
c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu);
break;
case 0x8d: /* lea r16/r32, m */
- c->dst.val = c->modrm_ea;
+ c->dst.val = c->src.addr.mem;
break;
case 0x8e: { /* mov seg, r/m16 */
uint16_t sel;
@@ -2847,76 +3244,87 @@
}
case 0x8f: /* pop (sole member of Grp1a) */
rc = emulate_grp1a(ctxt, ops);
- if (rc != X86EMUL_CONTINUE)
- goto done;
break;
- case 0x90: /* nop / xchg r8,rax */
- if (c->dst.ptr == (unsigned long *)&c->regs[VCPU_REGS_RAX]) {
- c->dst.type = OP_NONE; /* nop */
+ case 0x90 ... 0x97: /* nop / xchg reg, rax */
+ if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX])
break;
- }
- case 0x91 ... 0x97: /* xchg reg,rax */
- c->src.type = OP_REG;
- c->src.bytes = c->op_bytes;
- c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX];
- c->src.val = *(c->src.ptr);
goto xchg;
+ case 0x98: /* cbw/cwde/cdqe */
+ switch (c->op_bytes) {
+ case 2: c->dst.val = (s8)c->dst.val; break;
+ case 4: c->dst.val = (s16)c->dst.val; break;
+ case 8: c->dst.val = (s32)c->dst.val; break;
+ }
+ break;
case 0x9c: /* pushf */
c->src.val = (unsigned long) ctxt->eflags;
emulate_push(ctxt, ops);
break;
case 0x9d: /* popf */
c->dst.type = OP_REG;
- c->dst.ptr = (unsigned long *) &ctxt->eflags;
+ c->dst.addr.reg = &ctxt->eflags;
c->dst.bytes = c->op_bytes;
rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
- if (rc != X86EMUL_CONTINUE)
- goto done;
break;
- case 0xa0 ... 0xa3: /* mov */
- case 0xa4 ... 0xa5: /* movs */
- goto mov;
case 0xa6 ... 0xa7: /* cmps */
c->dst.type = OP_NONE; /* Disable writeback. */
- DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
+ DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.addr.mem, c->dst.addr.mem);
goto cmp;
case 0xa8 ... 0xa9: /* test ax, imm */
goto test;
- case 0xaa ... 0xab: /* stos */
- c->dst.val = c->regs[VCPU_REGS_RAX];
- break;
- case 0xac ... 0xad: /* lods */
- goto mov;
case 0xae ... 0xaf: /* scas */
- DPRINTF("Urk! I don't handle SCAS.\n");
- goto cannot_emulate;
- case 0xb0 ... 0xbf: /* mov r, imm */
- goto mov;
+ goto cmp;
case 0xc0 ... 0xc1:
emulate_grp2(ctxt);
break;
case 0xc3: /* ret */
c->dst.type = OP_REG;
- c->dst.ptr = &c->eip;
+ c->dst.addr.reg = &c->eip;
c->dst.bytes = c->op_bytes;
goto pop_instruction;
- case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
- mov:
- c->dst.val = c->src.val;
+ case 0xc4: /* les */
+ rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES);
+ break;
+ case 0xc5: /* lds */
+ rc = emulate_load_segment(ctxt, ops, VCPU_SREG_DS);
break;
case 0xcb: /* ret far */
rc = emulate_ret_far(ctxt, ops);
- if (rc != X86EMUL_CONTINUE)
- goto done;
+ break;
+ case 0xcc: /* int3 */
+ irq = 3;
+ goto do_interrupt;
+ case 0xcd: /* int n */
+ irq = c->src.val;
+ do_interrupt:
+ rc = emulate_int(ctxt, ops, irq);
+ break;
+ case 0xce: /* into */
+ if (ctxt->eflags & EFLG_OF) {
+ irq = 4;
+ goto do_interrupt;
+ }
+ break;
+ case 0xcf: /* iret */
+ rc = emulate_iret(ctxt, ops);
break;
case 0xd0 ... 0xd1: /* Grp2 */
- c->src.val = 1;
emulate_grp2(ctxt);
break;
case 0xd2 ... 0xd3: /* Grp2 */
c->src.val = c->regs[VCPU_REGS_RCX];
emulate_grp2(ctxt);
break;
+ case 0xe0 ... 0xe2: /* loop/loopz/loopnz */
+ register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
+ if (address_mask(c, c->regs[VCPU_REGS_RCX]) != 0 &&
+ (c->b == 0xe2 || test_cc(c->b ^ 0x5, ctxt->eflags)))
+ jmp_rel(c, c->src.val);
+ break;
+ case 0xe3: /* jcxz/jecxz/jrcxz */
+ if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0)
+ jmp_rel(c, c->src.val);
+ break;
case 0xe4: /* inb */
case 0xe5: /* in */
goto do_io_in;
@@ -2964,15 +3372,16 @@
break;
case 0xee: /* out dx,al */
case 0xef: /* out dx,(e/r)ax */
- c->src.val = c->regs[VCPU_REGS_RDX];
+ c->dst.val = c->regs[VCPU_REGS_RDX];
do_io_out:
- c->dst.bytes = min(c->dst.bytes, 4u);
- if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
+ c->src.bytes = min(c->src.bytes, 4u);
+ if (!emulator_io_permited(ctxt, ops, c->dst.val,
+ c->src.bytes)) {
emulate_gp(ctxt, 0);
goto done;
}
- ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1,
- ctxt->vcpu);
+ ops->pio_out_emulated(c->src.bytes, c->dst.val,
+ &c->src.val, 1, ctxt->vcpu);
c->dst.type = OP_NONE; /* Disable writeback. */
break;
case 0xf4: /* hlt */
@@ -2981,24 +3390,22 @@
case 0xf5: /* cmc */
/* complement carry flag from eflags reg */
ctxt->eflags ^= EFLG_CF;
- c->dst.type = OP_NONE; /* Disable writeback. */
break;
case 0xf6 ... 0xf7: /* Grp3 */
- if (!emulate_grp3(ctxt, ops))
- goto cannot_emulate;
+ rc = emulate_grp3(ctxt, ops);
break;
case 0xf8: /* clc */
ctxt->eflags &= ~EFLG_CF;
- c->dst.type = OP_NONE; /* Disable writeback. */
+ break;
+ case 0xf9: /* stc */
+ ctxt->eflags |= EFLG_CF;
break;
case 0xfa: /* cli */
if (emulator_bad_iopl(ctxt, ops)) {
emulate_gp(ctxt, 0);
goto done;
- } else {
+ } else
ctxt->eflags &= ~X86_EFLAGS_IF;
- c->dst.type = OP_NONE; /* Disable writeback. */
- }
break;
case 0xfb: /* sti */
if (emulator_bad_iopl(ctxt, ops)) {
@@ -3007,29 +3414,29 @@
} else {
ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
ctxt->eflags |= X86_EFLAGS_IF;
- c->dst.type = OP_NONE; /* Disable writeback. */
}
break;
case 0xfc: /* cld */
ctxt->eflags &= ~EFLG_DF;
- c->dst.type = OP_NONE; /* Disable writeback. */
break;
case 0xfd: /* std */
ctxt->eflags |= EFLG_DF;
- c->dst.type = OP_NONE; /* Disable writeback. */
break;
case 0xfe: /* Grp4 */
grp45:
rc = emulate_grp45(ctxt, ops);
- if (rc != X86EMUL_CONTINUE)
- goto done;
break;
case 0xff: /* Grp5 */
if (c->modrm_reg == 5)
goto jump_far;
goto grp45;
+ default:
+ goto cannot_emulate;
}
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+
writeback:
rc = writeback(ctxt, ops);
if (rc != X86EMUL_CONTINUE)
@@ -3050,25 +3457,32 @@
&c->dst);
if (c->rep_prefix && (c->d & String)) {
- struct read_cache *rc = &ctxt->decode.io_read;
+ struct read_cache *r = &ctxt->decode.io_read;
register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
- /*
- * Re-enter guest when pio read ahead buffer is empty or,
- * if it is not used, after each 1024 iteration.
- */
- if ((rc->end == 0 && !(c->regs[VCPU_REGS_RCX] & 0x3ff)) ||
- (rc->end != 0 && rc->end == rc->pos))
- ctxt->restart = false;
+
+ if (!string_insn_completed(ctxt)) {
+ /*
+ * Re-enter guest when pio read ahead buffer is empty
+ * or, if it is not used, after each 1024 iteration.
+ */
+ if ((r->end != 0 || c->regs[VCPU_REGS_RCX] & 0x3ff) &&
+ (r->end == 0 || r->end != r->pos)) {
+ /*
+ * Reset read cache. Usually happens before
+ * decode, but since instruction is restarted
+ * we have to do it here.
+ */
+ ctxt->decode.mem_read.end = 0;
+ return EMULATION_RESTART;
+ }
+ goto done; /* skip rip writeback */
+ }
}
- /*
- * reset read cache here in case string instruction is restared
- * without decoding
- */
- ctxt->decode.mem_read.end = 0;
+
ctxt->eip = c->eip;
done:
- return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
+ return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
twobyte_insn:
switch (c->b) {
@@ -3091,7 +3505,7 @@
c->dst.type = OP_NONE;
break;
case 2: /* lgdt */
- rc = read_descriptor(ctxt, ops, c->src.ptr,
+ rc = read_descriptor(ctxt, ops, c->src.addr.mem,
&size, &address, c->op_bytes);
if (rc != X86EMUL_CONTINUE)
goto done;
@@ -3104,14 +3518,12 @@
switch (c->modrm_rm) {
case 1:
rc = kvm_fix_hypercall(ctxt->vcpu);
- if (rc != X86EMUL_CONTINUE)
- goto done;
break;
default:
goto cannot_emulate;
}
} else {
- rc = read_descriptor(ctxt, ops, c->src.ptr,
+ rc = read_descriptor(ctxt, ops, c->src.addr.mem,
&size, &address,
c->op_bytes);
if (rc != X86EMUL_CONTINUE)
@@ -3126,7 +3538,7 @@
c->dst.val = ops->get_cr(0, ctxt->vcpu);
break;
case 6: /* lmsw */
- ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0ful) |
+ ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0eul) |
(c->src.val & 0x0f), ctxt->vcpu);
c->dst.type = OP_NONE;
break;
@@ -3134,7 +3546,7 @@
emulate_ud(ctxt);
goto done;
case 7: /* invlpg*/
- emulate_invlpg(ctxt->vcpu, c->modrm_ea);
+ emulate_invlpg(ctxt->vcpu, c->src.addr.mem);
/* Disable writeback. */
c->dst.type = OP_NONE;
break;
@@ -3144,23 +3556,16 @@
break;
case 0x05: /* syscall */
rc = emulate_syscall(ctxt, ops);
- if (rc != X86EMUL_CONTINUE)
- goto done;
- else
- goto writeback;
break;
case 0x06:
emulate_clts(ctxt->vcpu);
- c->dst.type = OP_NONE;
break;
case 0x09: /* wbinvd */
kvm_emulate_wbinvd(ctxt->vcpu);
- c->dst.type = OP_NONE;
break;
case 0x08: /* invd */
case 0x0d: /* GrpP (prefetch) */
case 0x18: /* Grp16 (prefetch/nop) */
- c->dst.type = OP_NONE;
break;
case 0x20: /* mov cr, reg */
switch (c->modrm_reg) {
@@ -3170,8 +3575,7 @@
emulate_ud(ctxt);
goto done;
}
- c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu);
- c->dst.type = OP_NONE; /* no writeback */
+ c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu);
break;
case 0x21: /* mov from dr to reg */
if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
@@ -3179,11 +3583,10 @@
emulate_ud(ctxt);
goto done;
}
- ops->get_dr(c->modrm_reg, &c->regs[c->modrm_rm], ctxt->vcpu);
- c->dst.type = OP_NONE; /* no writeback */
+ ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu);
break;
case 0x22: /* mov reg, cr */
- if (ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu)) {
+ if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) {
emulate_gp(ctxt, 0);
goto done;
}
@@ -3196,7 +3599,7 @@
goto done;
}
- if (ops->set_dr(c->modrm_reg, c->regs[c->modrm_rm] &
+ if (ops->set_dr(c->modrm_reg, c->src.val &
((ctxt->mode == X86EMUL_MODE_PROT64) ?
~0ULL : ~0U), ctxt->vcpu) < 0) {
/* #UD condition is already handled by the code above */
@@ -3215,7 +3618,6 @@
goto done;
}
rc = X86EMUL_CONTINUE;
- c->dst.type = OP_NONE;
break;
case 0x32:
/* rdmsr */
@@ -3227,21 +3629,12 @@
c->regs[VCPU_REGS_RDX] = msr_data >> 32;
}
rc = X86EMUL_CONTINUE;
- c->dst.type = OP_NONE;
break;
case 0x34: /* sysenter */
rc = emulate_sysenter(ctxt, ops);
- if (rc != X86EMUL_CONTINUE)
- goto done;
- else
- goto writeback;
break;
case 0x35: /* sysexit */
rc = emulate_sysexit(ctxt, ops);
- if (rc != X86EMUL_CONTINUE)
- goto done;
- else
- goto writeback;
break;
case 0x40 ... 0x4f: /* cmov */
c->dst.val = c->dst.orig_val = c->src.val;
@@ -3251,15 +3644,15 @@
case 0x80 ... 0x8f: /* jnz rel, etc*/
if (test_cc(c->b, ctxt->eflags))
jmp_rel(c, c->src.val);
- c->dst.type = OP_NONE;
+ break;
+ case 0x90 ... 0x9f: /* setcc r/m8 */
+ c->dst.val = test_cc(c->b, ctxt->eflags);
break;
case 0xa0: /* push fs */
emulate_push_sreg(ctxt, ops, VCPU_SREG_FS);
break;
case 0xa1: /* pop fs */
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
- if (rc != X86EMUL_CONTINUE)
- goto done;
break;
case 0xa3:
bt: /* bt */
@@ -3277,13 +3670,9 @@
break;
case 0xa9: /* pop gs */
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
- if (rc != X86EMUL_CONTINUE)
- goto done;
break;
case 0xab:
bts: /* bts */
- /* only subword offset */
- c->src.val &= (c->dst.bytes << 3) - 1;
emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
break;
case 0xac: /* shrd imm8, r, r/m */
@@ -3306,15 +3695,22 @@
} else {
/* Failure: write the value we saw to EAX. */
c->dst.type = OP_REG;
- c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
+ c->dst.addr.reg = (unsigned long *)&c->regs[VCPU_REGS_RAX];
}
break;
+ case 0xb2: /* lss */
+ rc = emulate_load_segment(ctxt, ops, VCPU_SREG_SS);
+ break;
case 0xb3:
btr: /* btr */
- /* only subword offset */
- c->src.val &= (c->dst.bytes << 3) - 1;
emulate_2op_SrcV_nobyte("btr", c->src, c->dst, ctxt->eflags);
break;
+ case 0xb4: /* lfs */
+ rc = emulate_load_segment(ctxt, ops, VCPU_SREG_FS);
+ break;
+ case 0xb5: /* lgs */
+ rc = emulate_load_segment(ctxt, ops, VCPU_SREG_GS);
+ break;
case 0xb6 ... 0xb7: /* movzx */
c->dst.bytes = c->op_bytes;
c->dst.val = (c->d & ByteOp) ? (u8) c->src.val
@@ -3334,15 +3730,43 @@
break;
case 0xbb:
btc: /* btc */
- /* only subword offset */
- c->src.val &= (c->dst.bytes << 3) - 1;
emulate_2op_SrcV_nobyte("btc", c->src, c->dst, ctxt->eflags);
break;
+ case 0xbc: { /* bsf */
+ u8 zf;
+ __asm__ ("bsf %2, %0; setz %1"
+ : "=r"(c->dst.val), "=q"(zf)
+ : "r"(c->src.val));
+ ctxt->eflags &= ~X86_EFLAGS_ZF;
+ if (zf) {
+ ctxt->eflags |= X86_EFLAGS_ZF;
+ c->dst.type = OP_NONE; /* Disable writeback. */
+ }
+ break;
+ }
+ case 0xbd: { /* bsr */
+ u8 zf;
+ __asm__ ("bsr %2, %0; setz %1"
+ : "=r"(c->dst.val), "=q"(zf)
+ : "r"(c->src.val));
+ ctxt->eflags &= ~X86_EFLAGS_ZF;
+ if (zf) {
+ ctxt->eflags |= X86_EFLAGS_ZF;
+ c->dst.type = OP_NONE; /* Disable writeback. */
+ }
+ break;
+ }
case 0xbe ... 0xbf: /* movsx */
c->dst.bytes = c->op_bytes;
c->dst.val = (c->d & ByteOp) ? (s8) c->src.val :
(s16) c->src.val;
break;
+ case 0xc0 ... 0xc1: /* xadd */
+ emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
+ /* Write back the register source. */
+ c->src.val = c->dst.orig_val;
+ write_register_operand(&c->src);
+ break;
case 0xc3: /* movnti */
c->dst.bytes = c->op_bytes;
c->dst.val = (c->op_bytes == 4) ? (u32) c->src.val :
@@ -3350,10 +3774,14 @@
break;
case 0xc7: /* Grp9 (cmpxchg8b) */
rc = emulate_grp9(ctxt, ops);
- if (rc != X86EMUL_CONTINUE)
- goto done;
break;
+ default:
+ goto cannot_emulate;
}
+
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+
goto writeback;
cannot_emulate:
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index ddeb231..efad723 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -5,7 +5,7 @@
* Copyright (c) 2006 Intel Corporation
* Copyright (c) 2007 Keir Fraser, XenSource Inc
* Copyright (c) 2008 Intel Corporation
- * Copyright 2009 Red Hat, Inc. and/or its affilates.
+ * Copyright 2009 Red Hat, Inc. and/or its affiliates.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -232,15 +232,6 @@
}
}
-int pit_has_pending_timer(struct kvm_vcpu *vcpu)
-{
- struct kvm_pit *pit = vcpu->kvm->arch.vpit;
-
- if (pit && kvm_vcpu_is_bsp(vcpu) && pit->pit_state.irq_ack)
- return atomic_read(&pit->pit_state.pit_timer.pending);
- return 0;
-}
-
static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
{
struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 4b7b73c..f628234 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -3,7 +3,7 @@
*
* Copyright (c) 2003-2004 Fabrice Bellard
* Copyright (c) 2007 Intel Corporation
- * Copyright 2009 Red Hat, Inc. and/or its affilates.
+ * Copyright 2009 Red Hat, Inc. and/or its affiliates.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -39,7 +39,7 @@
static void pic_lock(struct kvm_pic *s)
__acquires(&s->lock)
{
- raw_spin_lock(&s->lock);
+ spin_lock(&s->lock);
}
static void pic_unlock(struct kvm_pic *s)
@@ -51,7 +51,7 @@
s->wakeup_needed = false;
- raw_spin_unlock(&s->lock);
+ spin_unlock(&s->lock);
if (wakeup) {
kvm_for_each_vcpu(i, vcpu, s->kvm) {
@@ -67,6 +67,7 @@
if (!found)
return;
+ kvm_make_request(KVM_REQ_EVENT, found);
kvm_vcpu_kick(found);
}
}
@@ -308,13 +309,17 @@
addr &= 1;
if (addr == 0) {
if (val & 0x10) {
- kvm_pic_reset(s); /* init */
- /*
- * deassert a pending interrupt
- */
- pic_irq_request(s->pics_state->kvm, 0);
- s->init_state = 1;
s->init4 = val & 1;
+ s->last_irr = 0;
+ s->imr = 0;
+ s->priority_add = 0;
+ s->special_mask = 0;
+ s->read_reg_select = 0;
+ if (!s->init4) {
+ s->special_fully_nested_mode = 0;
+ s->auto_eoi = 0;
+ }
+ s->init_state = 1;
if (val & 0x02)
printk(KERN_ERR "single mode not supported");
if (val & 0x08)
@@ -564,7 +569,7 @@
s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
if (!s)
return NULL;
- raw_spin_lock_init(&s->lock);
+ spin_lock_init(&s->lock);
s->kvm = kvm;
s->pics[0].elcr_mask = 0xf8;
s->pics[1].elcr_mask = 0xde;
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 2095a04..7e06ba1 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -1,7 +1,7 @@
/*
* irq.c: API for in kernel interrupt controller
* Copyright (c) 2007, Intel Corporation.
- * Copyright 2009 Red Hat, Inc. and/or its affilates.
+ * Copyright 2009 Red Hat, Inc. and/or its affiliates.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -33,12 +33,7 @@
*/
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- int ret;
-
- ret = pit_has_pending_timer(vcpu);
- ret |= apic_has_pending_timer(vcpu);
-
- return ret;
+ return apic_has_pending_timer(vcpu);
}
EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 63c3145..ba910d1 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -60,7 +60,7 @@
};
struct kvm_pic {
- raw_spinlock_t lock;
+ spinlock_t lock;
bool wakeup_needed;
unsigned pending_acks;
struct kvm *kvm;
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 6491ac8..975bb45 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -42,7 +42,14 @@
(unsigned long *)&vcpu->arch.regs_avail))
kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR);
- return vcpu->arch.pdptrs[index];
+ return vcpu->arch.walk_mmu->pdptrs[index];
+}
+
+static inline u64 kvm_pdptr_read_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, int index)
+{
+ load_pdptrs(vcpu, mmu, mmu->get_cr3(vcpu));
+
+ return mmu->pdptrs[index];
}
static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 22b06f7..413f897 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -5,7 +5,7 @@
* Copyright (C) 2006 Qumranet, Inc.
* Copyright (C) 2007 Novell
* Copyright (C) 2007 Intel
- * Copyright 2009 Red Hat, Inc. and/or its affilates.
+ * Copyright 2009 Red Hat, Inc. and/or its affiliates.
*
* Authors:
* Dor Laor <dor.laor@qumranet.com>
@@ -259,9 +259,10 @@
static void apic_update_ppr(struct kvm_lapic *apic)
{
- u32 tpr, isrv, ppr;
+ u32 tpr, isrv, ppr, old_ppr;
int isr;
+ old_ppr = apic_get_reg(apic, APIC_PROCPRI);
tpr = apic_get_reg(apic, APIC_TASKPRI);
isr = apic_find_highest_isr(apic);
isrv = (isr != -1) ? isr : 0;
@@ -274,7 +275,10 @@
apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
apic, ppr, isr, isrv);
- apic_set_reg(apic, APIC_PROCPRI, ppr);
+ if (old_ppr != ppr) {
+ apic_set_reg(apic, APIC_PROCPRI, ppr);
+ kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
+ }
}
static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
@@ -391,6 +395,7 @@
break;
}
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
break;
@@ -416,6 +421,7 @@
"INIT on a runnable vcpu %d\n",
vcpu->vcpu_id);
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
} else {
apic_debug("Ignoring de-assert INIT to vcpu %d\n",
@@ -430,6 +436,7 @@
result = 1;
vcpu->arch.sipi_vector = vector;
vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
}
break;
@@ -475,6 +482,7 @@
trigger_mode = IOAPIC_EDGE_TRIG;
if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
+ kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
}
static void apic_send_ipi(struct kvm_lapic *apic)
@@ -1151,6 +1159,7 @@
update_divide_count(apic);
start_apic_timer(apic);
apic->irr_pending = true;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
}
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 311f6da..908ea54 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -7,7 +7,7 @@
* MMU support
*
* Copyright (C) 2006 Qumranet, Inc.
- * Copyright 2010 Red Hat, Inc. and/or its affilates.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
*
* Authors:
* Yaniv Kamay <yaniv@qumranet.com>
@@ -49,16 +49,26 @@
*/
bool tdp_enabled = false;
+enum {
+ AUDIT_PRE_PAGE_FAULT,
+ AUDIT_POST_PAGE_FAULT,
+ AUDIT_PRE_PTE_WRITE,
+ AUDIT_POST_PTE_WRITE,
+ AUDIT_PRE_SYNC,
+ AUDIT_POST_SYNC
+};
+
+char *audit_point_name[] = {
+ "pre page fault",
+ "post page fault",
+ "pre pte write",
+ "post pte write",
+ "pre sync",
+ "post sync"
+};
+
#undef MMU_DEBUG
-#undef AUDIT
-
-#ifdef AUDIT
-static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg);
-#else
-static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {}
-#endif
-
#ifdef MMU_DEBUG
#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
@@ -71,7 +81,7 @@
#endif
-#if defined(MMU_DEBUG) || defined(AUDIT)
+#ifdef MMU_DEBUG
static int dbg = 0;
module_param(dbg, bool, 0644);
#endif
@@ -89,6 +99,8 @@
}
#endif
+#define PTE_PREFETCH_NUM 8
+
#define PT_FIRST_AVAIL_BITS_SHIFT 9
#define PT64_SECOND_AVAIL_BITS_SHIFT 52
@@ -178,6 +190,7 @@
static struct kmem_cache *pte_chain_cache;
static struct kmem_cache *rmap_desc_cache;
static struct kmem_cache *mmu_page_header_cache;
+static struct percpu_counter kvm_total_used_mmu_pages;
static u64 __read_mostly shadow_trap_nonpresent_pte;
static u64 __read_mostly shadow_notrap_nonpresent_pte;
@@ -299,18 +312,50 @@
#endif
}
+static bool spte_has_volatile_bits(u64 spte)
+{
+ if (!shadow_accessed_mask)
+ return false;
+
+ if (!is_shadow_present_pte(spte))
+ return false;
+
+ if ((spte & shadow_accessed_mask) &&
+ (!is_writable_pte(spte) || (spte & shadow_dirty_mask)))
+ return false;
+
+ return true;
+}
+
+static bool spte_is_bit_cleared(u64 old_spte, u64 new_spte, u64 bit_mask)
+{
+ return (old_spte & bit_mask) && !(new_spte & bit_mask);
+}
+
static void update_spte(u64 *sptep, u64 new_spte)
{
- u64 old_spte;
+ u64 mask, old_spte = *sptep;
- if (!shadow_accessed_mask || (new_spte & shadow_accessed_mask) ||
- !is_rmap_spte(*sptep))
+ WARN_ON(!is_rmap_spte(new_spte));
+
+ new_spte |= old_spte & shadow_dirty_mask;
+
+ mask = shadow_accessed_mask;
+ if (is_writable_pte(old_spte))
+ mask |= shadow_dirty_mask;
+
+ if (!spte_has_volatile_bits(old_spte) || (new_spte & mask) == mask)
__set_spte(sptep, new_spte);
- else {
+ else
old_spte = __xchg_spte(sptep, new_spte);
- if (old_spte & shadow_accessed_mask)
- mark_page_accessed(pfn_to_page(spte_to_pfn(old_spte)));
- }
+
+ if (!shadow_accessed_mask)
+ return;
+
+ if (spte_is_bit_cleared(old_spte, new_spte, shadow_accessed_mask))
+ kvm_set_pfn_accessed(spte_to_pfn(old_spte));
+ if (spte_is_bit_cleared(old_spte, new_spte, shadow_dirty_mask))
+ kvm_set_pfn_dirty(spte_to_pfn(old_spte));
}
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
@@ -367,7 +412,7 @@
if (r)
goto out;
r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache,
- rmap_desc_cache, 4);
+ rmap_desc_cache, 4 + PTE_PREFETCH_NUM);
if (r)
goto out;
r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
@@ -591,6 +636,7 @@
desc->sptes[0] = (u64 *)*rmapp;
desc->sptes[1] = spte;
*rmapp = (unsigned long)desc | 1;
+ ++count;
} else {
rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
@@ -603,7 +649,7 @@
desc = desc->more;
}
for (i = 0; desc->sptes[i]; ++i)
- ;
+ ++count;
desc->sptes[i] = spte;
}
return count;
@@ -645,18 +691,17 @@
gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
rmapp = gfn_to_rmap(kvm, gfn, sp->role.level);
if (!*rmapp) {
- printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
+ printk(KERN_ERR "rmap_remove: %p 0->BUG\n", spte);
BUG();
} else if (!(*rmapp & 1)) {
- rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte);
+ rmap_printk("rmap_remove: %p 1->0\n", spte);
if ((u64 *)*rmapp != spte) {
- printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n",
- spte, *spte);
+ printk(KERN_ERR "rmap_remove: %p 1->BUG\n", spte);
BUG();
}
*rmapp = 0;
} else {
- rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte);
+ rmap_printk("rmap_remove: %p many->many\n", spte);
desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
prev_desc = NULL;
while (desc) {
@@ -670,7 +715,7 @@
prev_desc = desc;
desc = desc->more;
}
- pr_err("rmap_remove: %p %llx many->many\n", spte, *spte);
+ pr_err("rmap_remove: %p many->many\n", spte);
BUG();
}
}
@@ -680,18 +725,18 @@
pfn_t pfn;
u64 old_spte = *sptep;
- if (!shadow_accessed_mask || !is_shadow_present_pte(old_spte) ||
- old_spte & shadow_accessed_mask) {
+ if (!spte_has_volatile_bits(old_spte))
__set_spte(sptep, new_spte);
- } else
+ else
old_spte = __xchg_spte(sptep, new_spte);
if (!is_rmap_spte(old_spte))
return;
+
pfn = spte_to_pfn(old_spte);
if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
kvm_set_pfn_accessed(pfn);
- if (is_writable_pte(old_spte))
+ if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask))
kvm_set_pfn_dirty(pfn);
}
@@ -746,13 +791,6 @@
}
spte = rmap_next(kvm, rmapp, spte);
}
- if (write_protected) {
- pfn_t pfn;
-
- spte = rmap_next(kvm, rmapp, NULL);
- pfn = spte_to_pfn(*spte);
- kvm_set_pfn_dirty(pfn);
- }
/* check for huge page mappings */
for (i = PT_DIRECTORY_LEVEL;
@@ -947,6 +985,18 @@
}
#endif
+/*
+ * This value is the sum of all of the kvm instances's
+ * kvm->arch.n_used_mmu_pages values. We need a global,
+ * aggregate version in order to make the slab shrinker
+ * faster
+ */
+static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
+{
+ kvm->arch.n_used_mmu_pages += nr;
+ percpu_counter_add(&kvm_total_used_mmu_pages, nr);
+}
+
static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
ASSERT(is_empty_shadow_page(sp->spt));
@@ -956,7 +1006,7 @@
if (!sp->role.direct)
__free_page(virt_to_page(sp->gfns));
kmem_cache_free(mmu_page_header_cache, sp);
- ++kvm->arch.n_free_mmu_pages;
+ kvm_mod_used_mmu_pages(kvm, -1);
}
static unsigned kvm_page_table_hashfn(gfn_t gfn)
@@ -979,7 +1029,7 @@
bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
sp->multimapped = 0;
sp->parent_pte = parent_pte;
- --vcpu->kvm->arch.n_free_mmu_pages;
+ kvm_mod_used_mmu_pages(vcpu->kvm, +1);
return sp;
}
@@ -1403,7 +1453,8 @@
if (role.direct)
role.cr4_pae = 0;
role.access = access;
- if (!tdp_enabled && vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
+ if (!vcpu->arch.mmu.direct_map
+ && vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
role.quadrant = quadrant;
@@ -1458,6 +1509,12 @@
iterator->addr = addr;
iterator->shadow_addr = vcpu->arch.mmu.root_hpa;
iterator->level = vcpu->arch.mmu.shadow_root_level;
+
+ if (iterator->level == PT64_ROOT_LEVEL &&
+ vcpu->arch.mmu.root_level < PT64_ROOT_LEVEL &&
+ !vcpu->arch.mmu.direct_map)
+ --iterator->level;
+
if (iterator->level == PT32E_ROOT_LEVEL) {
iterator->shadow_addr
= vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
@@ -1665,41 +1722,31 @@
/*
* Changing the number of mmu pages allocated to the vm
- * Note: if kvm_nr_mmu_pages is too small, you will get dead lock
+ * Note: if goal_nr_mmu_pages is too small, you will get dead lock
*/
-void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
{
- int used_pages;
LIST_HEAD(invalid_list);
-
- used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages;
- used_pages = max(0, used_pages);
-
/*
* If we set the number of mmu pages to be smaller be than the
* number of actived pages , we must to free some mmu pages before we
* change the value
*/
- if (used_pages > kvm_nr_mmu_pages) {
- while (used_pages > kvm_nr_mmu_pages &&
+ if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) {
+ while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages &&
!list_empty(&kvm->arch.active_mmu_pages)) {
struct kvm_mmu_page *page;
page = container_of(kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
- used_pages -= kvm_mmu_prepare_zap_page(kvm, page,
- &invalid_list);
+ kvm_mmu_prepare_zap_page(kvm, page, &invalid_list);
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
}
- kvm_mmu_commit_zap_page(kvm, &invalid_list);
- kvm_nr_mmu_pages = used_pages;
- kvm->arch.n_free_mmu_pages = 0;
+ goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages;
}
- else
- kvm->arch.n_free_mmu_pages += kvm_nr_mmu_pages
- - kvm->arch.n_alloc_mmu_pages;
- kvm->arch.n_alloc_mmu_pages = kvm_nr_mmu_pages;
+ kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
}
static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
@@ -1709,11 +1756,11 @@
LIST_HEAD(invalid_list);
int r;
- pgprintk("%s: looking for gfn %lx\n", __func__, gfn);
+ pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
r = 0;
for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) {
- pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
+ pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
sp->role.word);
r = 1;
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
@@ -1729,7 +1776,7 @@
LIST_HEAD(invalid_list);
for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) {
- pgprintk("%s: zap %lx %x\n",
+ pgprintk("%s: zap %llx %x\n",
__func__, gfn, sp->role.word);
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
}
@@ -1925,7 +1972,7 @@
* whether the guest actually used the pte (in order to detect
* demand paging).
*/
- spte = shadow_base_present_pte | shadow_dirty_mask;
+ spte = shadow_base_present_pte;
if (!speculative)
spte |= shadow_accessed_mask;
if (!dirty)
@@ -1948,8 +1995,8 @@
spte |= (u64)pfn << PAGE_SHIFT;
if ((pte_access & ACC_WRITE_MASK)
- || (!tdp_enabled && write_fault && !is_write_protection(vcpu)
- && !user_fault)) {
+ || (!vcpu->arch.mmu.direct_map && write_fault
+ && !is_write_protection(vcpu) && !user_fault)) {
if (level > PT_PAGE_TABLE_LEVEL &&
has_wrprotected_page(vcpu->kvm, gfn, level)) {
@@ -1960,7 +2007,8 @@
spte |= PT_WRITABLE_MASK;
- if (!tdp_enabled && !(pte_access & ACC_WRITE_MASK))
+ if (!vcpu->arch.mmu.direct_map
+ && !(pte_access & ACC_WRITE_MASK))
spte &= ~PT_USER_MASK;
/*
@@ -1973,7 +2021,7 @@
goto set_pte;
if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
- pgprintk("%s: found shadow page for %lx, marking ro\n",
+ pgprintk("%s: found shadow page for %llx, marking ro\n",
__func__, gfn);
ret = 1;
pte_access &= ~ACC_WRITE_MASK;
@@ -1986,8 +2034,6 @@
mark_page_dirty(vcpu->kvm, gfn);
set_pte:
- if (is_writable_pte(*sptep) && !is_writable_pte(spte))
- kvm_set_pfn_dirty(pfn);
update_spte(sptep, spte);
done:
return ret;
@@ -2004,7 +2050,7 @@
int rmap_count;
pgprintk("%s: spte %llx access %x write_fault %d"
- " user_fault %d gfn %lx\n",
+ " user_fault %d gfn %llx\n",
__func__, *sptep, pt_access,
write_fault, user_fault, gfn);
@@ -2023,7 +2069,7 @@
__set_spte(sptep, shadow_trap_nonpresent_pte);
kvm_flush_remote_tlbs(vcpu->kvm);
} else if (pfn != spte_to_pfn(*sptep)) {
- pgprintk("hfn old %lx new %lx\n",
+ pgprintk("hfn old %llx new %llx\n",
spte_to_pfn(*sptep), pfn);
drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte);
kvm_flush_remote_tlbs(vcpu->kvm);
@@ -2040,7 +2086,7 @@
}
pgprintk("%s: setting spte %llx\n", __func__, *sptep);
- pgprintk("instantiating %s PTE (%s) at %ld (%llx) addr %p\n",
+ pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n",
is_large_pte(*sptep)? "2MB" : "4kB",
*sptep & PT_PRESENT_MASK ?"RW":"R", gfn,
*sptep, sptep);
@@ -2064,6 +2110,105 @@
{
}
+static struct kvm_memory_slot *
+pte_prefetch_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log)
+{
+ struct kvm_memory_slot *slot;
+
+ slot = gfn_to_memslot(vcpu->kvm, gfn);
+ if (!slot || slot->flags & KVM_MEMSLOT_INVALID ||
+ (no_dirty_log && slot->dirty_bitmap))
+ slot = NULL;
+
+ return slot;
+}
+
+static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
+ bool no_dirty_log)
+{
+ struct kvm_memory_slot *slot;
+ unsigned long hva;
+
+ slot = pte_prefetch_gfn_to_memslot(vcpu, gfn, no_dirty_log);
+ if (!slot) {
+ get_page(bad_page);
+ return page_to_pfn(bad_page);
+ }
+
+ hva = gfn_to_hva_memslot(slot, gfn);
+
+ return hva_to_pfn_atomic(vcpu->kvm, hva);
+}
+
+static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *sp,
+ u64 *start, u64 *end)
+{
+ struct page *pages[PTE_PREFETCH_NUM];
+ unsigned access = sp->role.access;
+ int i, ret;
+ gfn_t gfn;
+
+ gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt);
+ if (!pte_prefetch_gfn_to_memslot(vcpu, gfn, access & ACC_WRITE_MASK))
+ return -1;
+
+ ret = gfn_to_page_many_atomic(vcpu->kvm, gfn, pages, end - start);
+ if (ret <= 0)
+ return -1;
+
+ for (i = 0; i < ret; i++, gfn++, start++)
+ mmu_set_spte(vcpu, start, ACC_ALL,
+ access, 0, 0, 1, NULL,
+ sp->role.level, gfn,
+ page_to_pfn(pages[i]), true, true);
+
+ return 0;
+}
+
+static void __direct_pte_prefetch(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *sp, u64 *sptep)
+{
+ u64 *spte, *start = NULL;
+ int i;
+
+ WARN_ON(!sp->role.direct);
+
+ i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1);
+ spte = sp->spt + i;
+
+ for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
+ if (*spte != shadow_trap_nonpresent_pte || spte == sptep) {
+ if (!start)
+ continue;
+ if (direct_pte_prefetch_many(vcpu, sp, start, spte) < 0)
+ break;
+ start = NULL;
+ } else if (!start)
+ start = spte;
+ }
+}
+
+static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
+{
+ struct kvm_mmu_page *sp;
+
+ /*
+ * Since it's no accessed bit on EPT, it's no way to
+ * distinguish between actually accessed translations
+ * and prefetched, so disable pte prefetch if EPT is
+ * enabled.
+ */
+ if (!shadow_accessed_mask)
+ return;
+
+ sp = page_header(__pa(sptep));
+ if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+ return;
+
+ __direct_pte_prefetch(vcpu, sp, sptep);
+}
+
static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
int level, gfn_t gfn, pfn_t pfn)
{
@@ -2077,6 +2222,7 @@
mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
0, write, 1, &pt_write,
level, gfn, pfn, false, true);
+ direct_pte_prefetch(vcpu, iterator.sptep);
++vcpu->stat.pf_fixed;
break;
}
@@ -2098,28 +2244,31 @@
__set_spte(iterator.sptep,
__pa(sp->spt)
| PT_PRESENT_MASK | PT_WRITABLE_MASK
- | shadow_user_mask | shadow_x_mask);
+ | shadow_user_mask | shadow_x_mask
+ | shadow_accessed_mask);
}
}
return pt_write;
}
-static void kvm_send_hwpoison_signal(struct kvm *kvm, gfn_t gfn)
+static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk)
{
- char buf[1];
- void __user *hva;
- int r;
+ siginfo_t info;
- /* Touch the page, so send SIGBUS */
- hva = (void __user *)gfn_to_hva(kvm, gfn);
- r = copy_from_user(buf, hva, 1);
+ info.si_signo = SIGBUS;
+ info.si_errno = 0;
+ info.si_code = BUS_MCEERR_AR;
+ info.si_addr = (void __user *)address;
+ info.si_addr_lsb = PAGE_SHIFT;
+
+ send_sig_info(SIGBUS, &info, tsk);
}
static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn)
{
kvm_release_pfn_clean(pfn);
if (is_hwpoison_pfn(pfn)) {
- kvm_send_hwpoison_signal(kvm, gfn);
+ kvm_send_hwpoison_signal(gfn_to_hva(kvm, gfn), current);
return 0;
} else if (is_fault_pfn(pfn))
return -EFAULT;
@@ -2179,7 +2328,9 @@
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
spin_lock(&vcpu->kvm->mmu_lock);
- if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+ if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL &&
+ (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL ||
+ vcpu->arch.mmu.direct_map)) {
hpa_t root = vcpu->arch.mmu.root_hpa;
sp = page_header(root);
@@ -2222,80 +2373,158 @@
return ret;
}
-static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
+static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
{
- int i;
- gfn_t root_gfn;
struct kvm_mmu_page *sp;
- int direct = 0;
- u64 pdptr;
-
- root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
+ unsigned i;
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+ spin_lock(&vcpu->kvm->mmu_lock);
+ kvm_mmu_free_some_pages(vcpu);
+ sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL,
+ 1, ACC_ALL, NULL);
+ ++sp->root_count;
+ spin_unlock(&vcpu->kvm->mmu_lock);
+ vcpu->arch.mmu.root_hpa = __pa(sp->spt);
+ } else if (vcpu->arch.mmu.shadow_root_level == PT32E_ROOT_LEVEL) {
+ for (i = 0; i < 4; ++i) {
+ hpa_t root = vcpu->arch.mmu.pae_root[i];
+
+ ASSERT(!VALID_PAGE(root));
+ spin_lock(&vcpu->kvm->mmu_lock);
+ kvm_mmu_free_some_pages(vcpu);
+ sp = kvm_mmu_get_page(vcpu, i << 30, i << 30,
+ PT32_ROOT_LEVEL, 1, ACC_ALL,
+ NULL);
+ root = __pa(sp->spt);
+ ++sp->root_count;
+ spin_unlock(&vcpu->kvm->mmu_lock);
+ vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
+ }
+ vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
+ } else
+ BUG();
+
+ return 0;
+}
+
+static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
+{
+ struct kvm_mmu_page *sp;
+ u64 pdptr, pm_mask;
+ gfn_t root_gfn;
+ int i;
+
+ root_gfn = vcpu->arch.mmu.get_cr3(vcpu) >> PAGE_SHIFT;
+
+ if (mmu_check_root(vcpu, root_gfn))
+ return 1;
+
+ /*
+ * Do we shadow a long mode page table? If so we need to
+ * write-protect the guests page table root.
+ */
+ if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
hpa_t root = vcpu->arch.mmu.root_hpa;
ASSERT(!VALID_PAGE(root));
- if (mmu_check_root(vcpu, root_gfn))
- return 1;
- if (tdp_enabled) {
- direct = 1;
- root_gfn = 0;
- }
+
spin_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_free_some_pages(vcpu);
- sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
- PT64_ROOT_LEVEL, direct,
- ACC_ALL, NULL);
+ sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
+ 0, ACC_ALL, NULL);
root = __pa(sp->spt);
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.root_hpa = root;
return 0;
}
- direct = !is_paging(vcpu);
+
+ /*
+ * We shadow a 32 bit page table. This may be a legacy 2-level
+ * or a PAE 3-level page table. In either case we need to be aware that
+ * the shadow page table may be a PAE or a long mode page table.
+ */
+ pm_mask = PT_PRESENT_MASK;
+ if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL)
+ pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
+
for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->arch.mmu.pae_root[i];
ASSERT(!VALID_PAGE(root));
if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) {
- pdptr = kvm_pdptr_read(vcpu, i);
+ pdptr = kvm_pdptr_read_mmu(vcpu, &vcpu->arch.mmu, i);
if (!is_present_gpte(pdptr)) {
vcpu->arch.mmu.pae_root[i] = 0;
continue;
}
root_gfn = pdptr >> PAGE_SHIFT;
- } else if (vcpu->arch.mmu.root_level == 0)
- root_gfn = 0;
- if (mmu_check_root(vcpu, root_gfn))
- return 1;
- if (tdp_enabled) {
- direct = 1;
- root_gfn = i << 30;
+ if (mmu_check_root(vcpu, root_gfn))
+ return 1;
}
spin_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_free_some_pages(vcpu);
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
- PT32_ROOT_LEVEL, direct,
+ PT32_ROOT_LEVEL, 0,
ACC_ALL, NULL);
root = __pa(sp->spt);
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
- vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
+ vcpu->arch.mmu.pae_root[i] = root | pm_mask;
}
vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
+
+ /*
+ * If we shadow a 32 bit page table with a long mode page
+ * table we enter this path.
+ */
+ if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+ if (vcpu->arch.mmu.lm_root == NULL) {
+ /*
+ * The additional page necessary for this is only
+ * allocated on demand.
+ */
+
+ u64 *lm_root;
+
+ lm_root = (void*)get_zeroed_page(GFP_KERNEL);
+ if (lm_root == NULL)
+ return 1;
+
+ lm_root[0] = __pa(vcpu->arch.mmu.pae_root) | pm_mask;
+
+ vcpu->arch.mmu.lm_root = lm_root;
+ }
+
+ vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.lm_root);
+ }
+
return 0;
}
+static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.mmu.direct_map)
+ return mmu_alloc_direct_roots(vcpu);
+ else
+ return mmu_alloc_shadow_roots(vcpu);
+}
+
static void mmu_sync_roots(struct kvm_vcpu *vcpu)
{
int i;
struct kvm_mmu_page *sp;
+ if (vcpu->arch.mmu.direct_map)
+ return;
+
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
- if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+
+ trace_kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
+ if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
hpa_t root = vcpu->arch.mmu.root_hpa;
sp = page_header(root);
mmu_sync_children(vcpu, sp);
@@ -2310,6 +2539,7 @@
mmu_sync_children(vcpu, sp);
}
}
+ trace_kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
}
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
@@ -2327,6 +2557,14 @@
return vaddr;
}
+static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
+ u32 access, u32 *error)
+{
+ if (error)
+ *error = 0;
+ return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access);
+}
+
static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
u32 error_code)
{
@@ -2393,10 +2631,9 @@
mmu_free_roots(vcpu);
}
-static int nonpaging_init_context(struct kvm_vcpu *vcpu)
+static int nonpaging_init_context(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context)
{
- struct kvm_mmu *context = &vcpu->arch.mmu;
-
context->new_cr3 = nonpaging_new_cr3;
context->page_fault = nonpaging_page_fault;
context->gva_to_gpa = nonpaging_gva_to_gpa;
@@ -2407,6 +2644,8 @@
context->root_level = 0;
context->shadow_root_level = PT32E_ROOT_LEVEL;
context->root_hpa = INVALID_PAGE;
+ context->direct_map = true;
+ context->nx = false;
return 0;
}
@@ -2422,11 +2661,14 @@
mmu_free_roots(vcpu);
}
-static void inject_page_fault(struct kvm_vcpu *vcpu,
- u64 addr,
- u32 err_code)
+static unsigned long get_cr3(struct kvm_vcpu *vcpu)
{
- kvm_inject_page_fault(vcpu, addr, err_code);
+ return vcpu->arch.cr3;
+}
+
+static void inject_page_fault(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.mmu.inject_page_fault(vcpu);
}
static void paging_free(struct kvm_vcpu *vcpu)
@@ -2434,12 +2676,12 @@
nonpaging_free(vcpu);
}
-static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
+static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
{
int bit7;
bit7 = (gpte >> 7) & 1;
- return (gpte & vcpu->arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0;
+ return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
}
#define PTTYPE 64
@@ -2450,13 +2692,14 @@
#include "paging_tmpl.h"
#undef PTTYPE
-static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
+static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context,
+ int level)
{
- struct kvm_mmu *context = &vcpu->arch.mmu;
int maxphyaddr = cpuid_maxphyaddr(vcpu);
u64 exb_bit_rsvd = 0;
- if (!is_nx(vcpu))
+ if (!context->nx)
exb_bit_rsvd = rsvd_bits(63, 63);
switch (level) {
case PT32_ROOT_LEVEL:
@@ -2511,9 +2754,13 @@
}
}
-static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
+static int paging64_init_context_common(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context,
+ int level)
{
- struct kvm_mmu *context = &vcpu->arch.mmu;
+ context->nx = is_nx(vcpu);
+
+ reset_rsvds_bits_mask(vcpu, context, level);
ASSERT(is_pae(vcpu));
context->new_cr3 = paging_new_cr3;
@@ -2526,20 +2773,23 @@
context->root_level = level;
context->shadow_root_level = level;
context->root_hpa = INVALID_PAGE;
+ context->direct_map = false;
return 0;
}
-static int paging64_init_context(struct kvm_vcpu *vcpu)
+static int paging64_init_context(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context)
{
- reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
- return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
+ return paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL);
}
-static int paging32_init_context(struct kvm_vcpu *vcpu)
+static int paging32_init_context(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context)
{
- struct kvm_mmu *context = &vcpu->arch.mmu;
+ context->nx = false;
- reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
+ reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL);
+
context->new_cr3 = paging_new_cr3;
context->page_fault = paging32_page_fault;
context->gva_to_gpa = paging32_gva_to_gpa;
@@ -2550,18 +2800,19 @@
context->root_level = PT32_ROOT_LEVEL;
context->shadow_root_level = PT32E_ROOT_LEVEL;
context->root_hpa = INVALID_PAGE;
+ context->direct_map = false;
return 0;
}
-static int paging32E_init_context(struct kvm_vcpu *vcpu)
+static int paging32E_init_context(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context)
{
- reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
- return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
+ return paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
}
static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
{
- struct kvm_mmu *context = &vcpu->arch.mmu;
+ struct kvm_mmu *context = vcpu->arch.walk_mmu;
context->new_cr3 = nonpaging_new_cr3;
context->page_fault = tdp_page_fault;
@@ -2571,20 +2822,29 @@
context->invlpg = nonpaging_invlpg;
context->shadow_root_level = kvm_x86_ops->get_tdp_level();
context->root_hpa = INVALID_PAGE;
+ context->direct_map = true;
+ context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
+ context->get_cr3 = get_cr3;
+ context->inject_page_fault = kvm_inject_page_fault;
+ context->nx = is_nx(vcpu);
if (!is_paging(vcpu)) {
+ context->nx = false;
context->gva_to_gpa = nonpaging_gva_to_gpa;
context->root_level = 0;
} else if (is_long_mode(vcpu)) {
- reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
+ context->nx = is_nx(vcpu);
+ reset_rsvds_bits_mask(vcpu, context, PT64_ROOT_LEVEL);
context->gva_to_gpa = paging64_gva_to_gpa;
context->root_level = PT64_ROOT_LEVEL;
} else if (is_pae(vcpu)) {
- reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
+ context->nx = is_nx(vcpu);
+ reset_rsvds_bits_mask(vcpu, context, PT32E_ROOT_LEVEL);
context->gva_to_gpa = paging64_gva_to_gpa;
context->root_level = PT32E_ROOT_LEVEL;
} else {
- reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
+ context->nx = false;
+ reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL);
context->gva_to_gpa = paging32_gva_to_gpa;
context->root_level = PT32_ROOT_LEVEL;
}
@@ -2592,33 +2852,83 @@
return 0;
}
-static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
+int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
{
int r;
-
ASSERT(vcpu);
ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
if (!is_paging(vcpu))
- r = nonpaging_init_context(vcpu);
+ r = nonpaging_init_context(vcpu, context);
else if (is_long_mode(vcpu))
- r = paging64_init_context(vcpu);
+ r = paging64_init_context(vcpu, context);
else if (is_pae(vcpu))
- r = paging32E_init_context(vcpu);
+ r = paging32E_init_context(vcpu, context);
else
- r = paging32_init_context(vcpu);
+ r = paging32_init_context(vcpu, context);
vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
- vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
+ vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
return r;
}
+EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
+
+static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
+{
+ int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu);
+
+ vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3;
+ vcpu->arch.walk_mmu->get_cr3 = get_cr3;
+ vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
+
+ return r;
+}
+
+static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
+{
+ struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
+
+ g_context->get_cr3 = get_cr3;
+ g_context->inject_page_fault = kvm_inject_page_fault;
+
+ /*
+ * Note that arch.mmu.gva_to_gpa translates l2_gva to l1_gpa. The
+ * translation of l2_gpa to l1_gpa addresses is done using the
+ * arch.nested_mmu.gva_to_gpa function. Basically the gva_to_gpa
+ * functions between mmu and nested_mmu are swapped.
+ */
+ if (!is_paging(vcpu)) {
+ g_context->nx = false;
+ g_context->root_level = 0;
+ g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested;
+ } else if (is_long_mode(vcpu)) {
+ g_context->nx = is_nx(vcpu);
+ reset_rsvds_bits_mask(vcpu, g_context, PT64_ROOT_LEVEL);
+ g_context->root_level = PT64_ROOT_LEVEL;
+ g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
+ } else if (is_pae(vcpu)) {
+ g_context->nx = is_nx(vcpu);
+ reset_rsvds_bits_mask(vcpu, g_context, PT32E_ROOT_LEVEL);
+ g_context->root_level = PT32E_ROOT_LEVEL;
+ g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
+ } else {
+ g_context->nx = false;
+ reset_rsvds_bits_mask(vcpu, g_context, PT32_ROOT_LEVEL);
+ g_context->root_level = PT32_ROOT_LEVEL;
+ g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
+ }
+
+ return 0;
+}
static int init_kvm_mmu(struct kvm_vcpu *vcpu)
{
vcpu->arch.update_pte.pfn = bad_pfn;
- if (tdp_enabled)
+ if (mmu_is_nested(vcpu))
+ return init_kvm_nested_mmu(vcpu);
+ else if (tdp_enabled)
return init_kvm_tdp_mmu(vcpu);
else
return init_kvm_softmmu(vcpu);
@@ -2653,7 +2963,7 @@
if (r)
goto out;
/* set_cr3() should ensure TLB has been flushed */
- kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
+ vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
out:
return r;
}
@@ -2663,6 +2973,7 @@
{
mmu_free_roots(vcpu);
}
+EXPORT_SYMBOL_GPL(kvm_mmu_unload);
static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp,
@@ -2695,7 +3006,7 @@
return;
}
- if (is_rsvd_bits_set(vcpu, *(u64 *)new, PT_PAGE_TABLE_LEVEL))
+ if (is_rsvd_bits_set(&vcpu->arch.mmu, *(u64 *)new, PT_PAGE_TABLE_LEVEL))
return;
++vcpu->kvm->stat.mmu_pte_updated;
@@ -2837,7 +3148,7 @@
kvm_mmu_access_page(vcpu, gfn);
kvm_mmu_free_some_pages(vcpu);
++vcpu->kvm->stat.mmu_pte_write;
- kvm_mmu_audit(vcpu, "pre pte write");
+ trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
if (guest_initiated) {
if (gfn == vcpu->arch.last_pt_write_gfn
&& !last_updated_pte_accessed(vcpu)) {
@@ -2910,7 +3221,7 @@
}
mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush);
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
- kvm_mmu_audit(vcpu, "post pte write");
+ trace_kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE);
spin_unlock(&vcpu->kvm->mmu_lock);
if (!is_error_pfn(vcpu->arch.update_pte.pfn)) {
kvm_release_pfn_clean(vcpu->arch.update_pte.pfn);
@@ -2923,7 +3234,7 @@
gpa_t gpa;
int r;
- if (tdp_enabled)
+ if (vcpu->arch.mmu.direct_map)
return 0;
gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
@@ -2937,21 +3248,18 @@
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
{
- int free_pages;
LIST_HEAD(invalid_list);
- free_pages = vcpu->kvm->arch.n_free_mmu_pages;
- while (free_pages < KVM_REFILL_PAGES &&
+ while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES &&
!list_empty(&vcpu->kvm->arch.active_mmu_pages)) {
struct kvm_mmu_page *sp;
sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
- free_pages += kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
- &invalid_list);
+ kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
+ kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
++vcpu->kvm->stat.mmu_recycled;
}
- kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
}
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
@@ -3013,6 +3321,8 @@
static void free_mmu_pages(struct kvm_vcpu *vcpu)
{
free_page((unsigned long)vcpu->arch.mmu.pae_root);
+ if (vcpu->arch.mmu.lm_root != NULL)
+ free_page((unsigned long)vcpu->arch.mmu.lm_root);
}
static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
@@ -3054,15 +3364,6 @@
return init_kvm_mmu(vcpu);
}
-void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
-{
- ASSERT(vcpu);
-
- destroy_kvm_mmu(vcpu);
- free_mmu_pages(vcpu);
- mmu_free_memory_caches(vcpu);
-}
-
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
{
struct kvm_mmu_page *sp;
@@ -3112,23 +3413,22 @@
{
struct kvm *kvm;
struct kvm *kvm_freed = NULL;
- int cache_count = 0;
+
+ if (nr_to_scan == 0)
+ goto out;
spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
- int npages, idx, freed_pages;
+ int idx, freed_pages;
LIST_HEAD(invalid_list);
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
- npages = kvm->arch.n_alloc_mmu_pages -
- kvm->arch.n_free_mmu_pages;
- cache_count += npages;
- if (!kvm_freed && nr_to_scan > 0 && npages > 0) {
+ if (!kvm_freed && nr_to_scan > 0 &&
+ kvm->arch.n_used_mmu_pages > 0) {
freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm,
&invalid_list);
- cache_count -= freed_pages;
kvm_freed = kvm;
}
nr_to_scan--;
@@ -3142,7 +3442,8 @@
spin_unlock(&kvm_lock);
- return cache_count;
+out:
+ return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
}
static struct shrinker mmu_shrinker = {
@@ -3163,6 +3464,7 @@
void kvm_mmu_module_exit(void)
{
mmu_destroy_caches();
+ percpu_counter_destroy(&kvm_total_used_mmu_pages);
unregister_shrinker(&mmu_shrinker);
}
@@ -3185,6 +3487,9 @@
if (!mmu_page_header_cache)
goto nomem;
+ if (percpu_counter_init(&kvm_total_used_mmu_pages, 0))
+ goto nomem;
+
register_shrinker(&mmu_shrinker);
return 0;
@@ -3355,271 +3660,18 @@
}
EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy);
-#ifdef AUDIT
-
-static const char *audit_msg;
-
-static gva_t canonicalize(gva_t gva)
-{
-#ifdef CONFIG_X86_64
- gva = (long long)(gva << 16) >> 16;
+#ifdef CONFIG_KVM_MMU_AUDIT
+#include "mmu_audit.c"
+#else
+static void mmu_audit_disable(void) { }
#endif
- return gva;
-}
-
-typedef void (*inspect_spte_fn) (struct kvm *kvm, u64 *sptep);
-
-static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp,
- inspect_spte_fn fn)
+void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
{
- int i;
+ ASSERT(vcpu);
- for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
- u64 ent = sp->spt[i];
-
- if (is_shadow_present_pte(ent)) {
- if (!is_last_spte(ent, sp->role.level)) {
- struct kvm_mmu_page *child;
- child = page_header(ent & PT64_BASE_ADDR_MASK);
- __mmu_spte_walk(kvm, child, fn);
- } else
- fn(kvm, &sp->spt[i]);
- }
- }
+ destroy_kvm_mmu(vcpu);
+ free_mmu_pages(vcpu);
+ mmu_free_memory_caches(vcpu);
+ mmu_audit_disable();
}
-
-static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
-{
- int i;
- struct kvm_mmu_page *sp;
-
- if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
- return;
- if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
- hpa_t root = vcpu->arch.mmu.root_hpa;
- sp = page_header(root);
- __mmu_spte_walk(vcpu->kvm, sp, fn);
- return;
- }
- for (i = 0; i < 4; ++i) {
- hpa_t root = vcpu->arch.mmu.pae_root[i];
-
- if (root && VALID_PAGE(root)) {
- root &= PT64_BASE_ADDR_MASK;
- sp = page_header(root);
- __mmu_spte_walk(vcpu->kvm, sp, fn);
- }
- }
- return;
-}
-
-static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
- gva_t va, int level)
-{
- u64 *pt = __va(page_pte & PT64_BASE_ADDR_MASK);
- int i;
- gva_t va_delta = 1ul << (PAGE_SHIFT + 9 * (level - 1));
-
- for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) {
- u64 ent = pt[i];
-
- if (ent == shadow_trap_nonpresent_pte)
- continue;
-
- va = canonicalize(va);
- if (is_shadow_present_pte(ent) && !is_last_spte(ent, level))
- audit_mappings_page(vcpu, ent, va, level - 1);
- else {
- gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, va, NULL);
- gfn_t gfn = gpa >> PAGE_SHIFT;
- pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn);
- hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT;
-
- if (is_error_pfn(pfn)) {
- kvm_release_pfn_clean(pfn);
- continue;
- }
-
- if (is_shadow_present_pte(ent)
- && (ent & PT64_BASE_ADDR_MASK) != hpa)
- printk(KERN_ERR "xx audit error: (%s) levels %d"
- " gva %lx gpa %llx hpa %llx ent %llx %d\n",
- audit_msg, vcpu->arch.mmu.root_level,
- va, gpa, hpa, ent,
- is_shadow_present_pte(ent));
- else if (ent == shadow_notrap_nonpresent_pte
- && !is_error_hpa(hpa))
- printk(KERN_ERR "audit: (%s) notrap shadow,"
- " valid guest gva %lx\n", audit_msg, va);
- kvm_release_pfn_clean(pfn);
-
- }
- }
-}
-
-static void audit_mappings(struct kvm_vcpu *vcpu)
-{
- unsigned i;
-
- if (vcpu->arch.mmu.root_level == 4)
- audit_mappings_page(vcpu, vcpu->arch.mmu.root_hpa, 0, 4);
- else
- for (i = 0; i < 4; ++i)
- if (vcpu->arch.mmu.pae_root[i] & PT_PRESENT_MASK)
- audit_mappings_page(vcpu,
- vcpu->arch.mmu.pae_root[i],
- i << 30,
- 2);
-}
-
-static int count_rmaps(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = vcpu->kvm;
- struct kvm_memslots *slots;
- int nmaps = 0;
- int i, j, k, idx;
-
- idx = srcu_read_lock(&kvm->srcu);
- slots = kvm_memslots(kvm);
- for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
- struct kvm_memory_slot *m = &slots->memslots[i];
- struct kvm_rmap_desc *d;
-
- for (j = 0; j < m->npages; ++j) {
- unsigned long *rmapp = &m->rmap[j];
-
- if (!*rmapp)
- continue;
- if (!(*rmapp & 1)) {
- ++nmaps;
- continue;
- }
- d = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
- while (d) {
- for (k = 0; k < RMAP_EXT; ++k)
- if (d->sptes[k])
- ++nmaps;
- else
- break;
- d = d->more;
- }
- }
- }
- srcu_read_unlock(&kvm->srcu, idx);
- return nmaps;
-}
-
-void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
-{
- unsigned long *rmapp;
- struct kvm_mmu_page *rev_sp;
- gfn_t gfn;
-
- if (is_writable_pte(*sptep)) {
- rev_sp = page_header(__pa(sptep));
- gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);
-
- if (!gfn_to_memslot(kvm, gfn)) {
- if (!printk_ratelimit())
- return;
- printk(KERN_ERR "%s: no memslot for gfn %ld\n",
- audit_msg, gfn);
- printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n",
- audit_msg, (long int)(sptep - rev_sp->spt),
- rev_sp->gfn);
- dump_stack();
- return;
- }
-
- rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level);
- if (!*rmapp) {
- if (!printk_ratelimit())
- return;
- printk(KERN_ERR "%s: no rmap for writable spte %llx\n",
- audit_msg, *sptep);
- dump_stack();
- }
- }
-
-}
-
-void audit_writable_sptes_have_rmaps(struct kvm_vcpu *vcpu)
-{
- mmu_spte_walk(vcpu, inspect_spte_has_rmap);
-}
-
-static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu)
-{
- struct kvm_mmu_page *sp;
- int i;
-
- list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
- u64 *pt = sp->spt;
-
- if (sp->role.level != PT_PAGE_TABLE_LEVEL)
- continue;
-
- for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
- u64 ent = pt[i];
-
- if (!(ent & PT_PRESENT_MASK))
- continue;
- if (!is_writable_pte(ent))
- continue;
- inspect_spte_has_rmap(vcpu->kvm, &pt[i]);
- }
- }
- return;
-}
-
-static void audit_rmap(struct kvm_vcpu *vcpu)
-{
- check_writable_mappings_rmap(vcpu);
- count_rmaps(vcpu);
-}
-
-static void audit_write_protection(struct kvm_vcpu *vcpu)
-{
- struct kvm_mmu_page *sp;
- struct kvm_memory_slot *slot;
- unsigned long *rmapp;
- u64 *spte;
- gfn_t gfn;
-
- list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
- if (sp->role.direct)
- continue;
- if (sp->unsync)
- continue;
-
- slot = gfn_to_memslot(vcpu->kvm, sp->gfn);
- rmapp = &slot->rmap[gfn - slot->base_gfn];
-
- spte = rmap_next(vcpu->kvm, rmapp, NULL);
- while (spte) {
- if (is_writable_pte(*spte))
- printk(KERN_ERR "%s: (%s) shadow page has "
- "writable mappings: gfn %lx role %x\n",
- __func__, audit_msg, sp->gfn,
- sp->role.word);
- spte = rmap_next(vcpu->kvm, rmapp, spte);
- }
- }
-}
-
-static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg)
-{
- int olddbg = dbg;
-
- dbg = 0;
- audit_msg = msg;
- audit_rmap(vcpu);
- audit_write_protection(vcpu);
- if (strcmp("pre pte write", audit_msg) != 0)
- audit_mappings(vcpu);
- audit_writable_sptes_have_rmaps(vcpu);
- dbg = olddbg;
-}
-
-#endif
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index be66759..7086ca8 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -49,10 +49,17 @@
#define PFERR_FETCH_MASK (1U << 4)
int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
+int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
+
+static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
+{
+ return kvm->arch.n_max_mmu_pages -
+ kvm->arch.n_used_mmu_pages;
+}
static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
{
- if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
+ if (unlikely(kvm_mmu_available_pages(vcpu->kvm)< KVM_MIN_FREE_MMU_PAGES))
__kvm_mmu_free_some_pages(vcpu);
}
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
new file mode 100644
index 0000000..ba2bcdd
--- /dev/null
+++ b/arch/x86/kvm/mmu_audit.c
@@ -0,0 +1,299 @@
+/*
+ * mmu_audit.c:
+ *
+ * Audit code for KVM MMU
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ *
+ * Authors:
+ * Yaniv Kamay <yaniv@qumranet.com>
+ * Avi Kivity <avi@qumranet.com>
+ * Marcelo Tosatti <mtosatti@redhat.com>
+ * Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include <linux/ratelimit.h>
+
+static int audit_point;
+
+#define audit_printk(fmt, args...) \
+ printk(KERN_ERR "audit: (%s) error: " \
+ fmt, audit_point_name[audit_point], ##args)
+
+typedef void (*inspect_spte_fn) (struct kvm_vcpu *vcpu, u64 *sptep, int level);
+
+static void __mmu_spte_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+ inspect_spte_fn fn, int level)
+{
+ int i;
+
+ for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
+ u64 *ent = sp->spt;
+
+ fn(vcpu, ent + i, level);
+
+ if (is_shadow_present_pte(ent[i]) &&
+ !is_last_spte(ent[i], level)) {
+ struct kvm_mmu_page *child;
+
+ child = page_header(ent[i] & PT64_BASE_ADDR_MASK);
+ __mmu_spte_walk(vcpu, child, fn, level - 1);
+ }
+ }
+}
+
+static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
+{
+ int i;
+ struct kvm_mmu_page *sp;
+
+ if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ return;
+
+ if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
+ hpa_t root = vcpu->arch.mmu.root_hpa;
+
+ sp = page_header(root);
+ __mmu_spte_walk(vcpu, sp, fn, PT64_ROOT_LEVEL);
+ return;
+ }
+
+ for (i = 0; i < 4; ++i) {
+ hpa_t root = vcpu->arch.mmu.pae_root[i];
+
+ if (root && VALID_PAGE(root)) {
+ root &= PT64_BASE_ADDR_MASK;
+ sp = page_header(root);
+ __mmu_spte_walk(vcpu, sp, fn, 2);
+ }
+ }
+
+ return;
+}
+
+typedef void (*sp_handler) (struct kvm *kvm, struct kvm_mmu_page *sp);
+
+static void walk_all_active_sps(struct kvm *kvm, sp_handler fn)
+{
+ struct kvm_mmu_page *sp;
+
+ list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link)
+ fn(kvm, sp);
+}
+
+static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
+{
+ struct kvm_mmu_page *sp;
+ gfn_t gfn;
+ pfn_t pfn;
+ hpa_t hpa;
+
+ sp = page_header(__pa(sptep));
+
+ if (sp->unsync) {
+ if (level != PT_PAGE_TABLE_LEVEL) {
+ audit_printk("unsync sp: %p level = %d\n", sp, level);
+ return;
+ }
+
+ if (*sptep == shadow_notrap_nonpresent_pte) {
+ audit_printk("notrap spte in unsync sp: %p\n", sp);
+ return;
+ }
+ }
+
+ if (sp->role.direct && *sptep == shadow_notrap_nonpresent_pte) {
+ audit_printk("notrap spte in direct sp: %p\n", sp);
+ return;
+ }
+
+ if (!is_shadow_present_pte(*sptep) || !is_last_spte(*sptep, level))
+ return;
+
+ gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
+ pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn);
+
+ if (is_error_pfn(pfn)) {
+ kvm_release_pfn_clean(pfn);
+ return;
+ }
+
+ hpa = pfn << PAGE_SHIFT;
+ if ((*sptep & PT64_BASE_ADDR_MASK) != hpa)
+ audit_printk("levels %d pfn %llx hpa %llx ent %llxn",
+ vcpu->arch.mmu.root_level, pfn, hpa, *sptep);
+}
+
+static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
+{
+ unsigned long *rmapp;
+ struct kvm_mmu_page *rev_sp;
+ gfn_t gfn;
+
+
+ rev_sp = page_header(__pa(sptep));
+ gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);
+
+ if (!gfn_to_memslot(kvm, gfn)) {
+ if (!printk_ratelimit())
+ return;
+ audit_printk("no memslot for gfn %llx\n", gfn);
+ audit_printk("index %ld of sp (gfn=%llx)\n",
+ (long int)(sptep - rev_sp->spt), rev_sp->gfn);
+ dump_stack();
+ return;
+ }
+
+ rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level);
+ if (!*rmapp) {
+ if (!printk_ratelimit())
+ return;
+ audit_printk("no rmap for writable spte %llx\n", *sptep);
+ dump_stack();
+ }
+}
+
+static void audit_sptes_have_rmaps(struct kvm_vcpu *vcpu, u64 *sptep, int level)
+{
+ if (is_shadow_present_pte(*sptep) && is_last_spte(*sptep, level))
+ inspect_spte_has_rmap(vcpu->kvm, sptep);
+}
+
+static void audit_spte_after_sync(struct kvm_vcpu *vcpu, u64 *sptep, int level)
+{
+ struct kvm_mmu_page *sp = page_header(__pa(sptep));
+
+ if (audit_point == AUDIT_POST_SYNC && sp->unsync)
+ audit_printk("meet unsync sp(%p) after sync root.\n", sp);
+}
+
+static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ int i;
+
+ if (sp->role.level != PT_PAGE_TABLE_LEVEL)
+ return;
+
+ for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
+ if (!is_rmap_spte(sp->spt[i]))
+ continue;
+
+ inspect_spte_has_rmap(kvm, sp->spt + i);
+ }
+}
+
+static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ struct kvm_memory_slot *slot;
+ unsigned long *rmapp;
+ u64 *spte;
+
+ if (sp->role.direct || sp->unsync || sp->role.invalid)
+ return;
+
+ slot = gfn_to_memslot(kvm, sp->gfn);
+ rmapp = &slot->rmap[sp->gfn - slot->base_gfn];
+
+ spte = rmap_next(kvm, rmapp, NULL);
+ while (spte) {
+ if (is_writable_pte(*spte))
+ audit_printk("shadow page has writable mappings: gfn "
+ "%llx role %x\n", sp->gfn, sp->role.word);
+ spte = rmap_next(kvm, rmapp, spte);
+ }
+}
+
+static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ check_mappings_rmap(kvm, sp);
+ audit_write_protection(kvm, sp);
+}
+
+static void audit_all_active_sps(struct kvm *kvm)
+{
+ walk_all_active_sps(kvm, audit_sp);
+}
+
+static void audit_spte(struct kvm_vcpu *vcpu, u64 *sptep, int level)
+{
+ audit_sptes_have_rmaps(vcpu, sptep, level);
+ audit_mappings(vcpu, sptep, level);
+ audit_spte_after_sync(vcpu, sptep, level);
+}
+
+static void audit_vcpu_spte(struct kvm_vcpu *vcpu)
+{
+ mmu_spte_walk(vcpu, audit_spte);
+}
+
+static void kvm_mmu_audit(void *ignore, struct kvm_vcpu *vcpu, int point)
+{
+ static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
+
+ if (!__ratelimit(&ratelimit_state))
+ return;
+
+ audit_point = point;
+ audit_all_active_sps(vcpu->kvm);
+ audit_vcpu_spte(vcpu);
+}
+
+static bool mmu_audit;
+
+static void mmu_audit_enable(void)
+{
+ int ret;
+
+ if (mmu_audit)
+ return;
+
+ ret = register_trace_kvm_mmu_audit(kvm_mmu_audit, NULL);
+ WARN_ON(ret);
+
+ mmu_audit = true;
+}
+
+static void mmu_audit_disable(void)
+{
+ if (!mmu_audit)
+ return;
+
+ unregister_trace_kvm_mmu_audit(kvm_mmu_audit, NULL);
+ tracepoint_synchronize_unregister();
+ mmu_audit = false;
+}
+
+static int mmu_audit_set(const char *val, const struct kernel_param *kp)
+{
+ int ret;
+ unsigned long enable;
+
+ ret = strict_strtoul(val, 10, &enable);
+ if (ret < 0)
+ return -EINVAL;
+
+ switch (enable) {
+ case 0:
+ mmu_audit_disable();
+ break;
+ case 1:
+ mmu_audit_enable();
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct kernel_param_ops audit_param_ops = {
+ .set = mmu_audit_set,
+ .get = param_get_bool,
+};
+
+module_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644);
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index 3aab0f0..b60b4fd 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -195,6 +195,25 @@
TP_ARGS(sp)
);
+
+TRACE_EVENT(
+ kvm_mmu_audit,
+ TP_PROTO(struct kvm_vcpu *vcpu, int audit_point),
+ TP_ARGS(vcpu, audit_point),
+
+ TP_STRUCT__entry(
+ __field(struct kvm_vcpu *, vcpu)
+ __field(int, audit_point)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu = vcpu;
+ __entry->audit_point = audit_point;
+ ),
+
+ TP_printk("vcpu:%d %s", __entry->vcpu->cpu,
+ audit_point_name[__entry->audit_point])
+);
#endif /* _TRACE_KVMMMU_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 51ef909..cd7a833 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -7,7 +7,7 @@
* MMU support
*
* Copyright (C) 2006 Qumranet, Inc.
- * Copyright 2010 Red Hat, Inc. and/or its affilates.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
*
* Authors:
* Yaniv Kamay <yaniv@qumranet.com>
@@ -67,6 +67,7 @@
int level;
gfn_t table_gfn[PT_MAX_FULL_LEVELS];
pt_element_t ptes[PT_MAX_FULL_LEVELS];
+ pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
unsigned pt_access;
unsigned pte_access;
@@ -104,7 +105,7 @@
access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
#if PTTYPE == 64
- if (is_nx(vcpu))
+ if (vcpu->arch.mmu.nx)
access &= ~(gpte >> PT64_NX_SHIFT);
#endif
return access;
@@ -113,26 +114,32 @@
/*
* Fetch a guest pte for a guest virtual address
*/
-static int FNAME(walk_addr)(struct guest_walker *walker,
- struct kvm_vcpu *vcpu, gva_t addr,
- int write_fault, int user_fault, int fetch_fault)
+static int FNAME(walk_addr_generic)(struct guest_walker *walker,
+ struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ gva_t addr, u32 access)
{
pt_element_t pte;
gfn_t table_gfn;
unsigned index, pt_access, uninitialized_var(pte_access);
gpa_t pte_gpa;
bool eperm, present, rsvd_fault;
+ int offset, write_fault, user_fault, fetch_fault;
+
+ write_fault = access & PFERR_WRITE_MASK;
+ user_fault = access & PFERR_USER_MASK;
+ fetch_fault = access & PFERR_FETCH_MASK;
trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault,
fetch_fault);
walk:
present = true;
eperm = rsvd_fault = false;
- walker->level = vcpu->arch.mmu.root_level;
- pte = vcpu->arch.cr3;
+ walker->level = mmu->root_level;
+ pte = mmu->get_cr3(vcpu);
+
#if PTTYPE == 64
- if (!is_long_mode(vcpu)) {
- pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3);
+ if (walker->level == PT32E_ROOT_LEVEL) {
+ pte = kvm_pdptr_read_mmu(vcpu, mmu, (addr >> 30) & 3);
trace_kvm_mmu_paging_element(pte, walker->level);
if (!is_present_gpte(pte)) {
present = false;
@@ -142,7 +149,7 @@
}
#endif
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
- (vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
+ (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
pt_access = ACC_ALL;
@@ -150,12 +157,14 @@
index = PT_INDEX(addr, walker->level);
table_gfn = gpte_to_gfn(pte);
- pte_gpa = gfn_to_gpa(table_gfn);
- pte_gpa += index * sizeof(pt_element_t);
+ offset = index * sizeof(pt_element_t);
+ pte_gpa = gfn_to_gpa(table_gfn) + offset;
walker->table_gfn[walker->level - 1] = table_gfn;
walker->pte_gpa[walker->level - 1] = pte_gpa;
- if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) {
+ if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, &pte,
+ offset, sizeof(pte),
+ PFERR_USER_MASK|PFERR_WRITE_MASK)) {
present = false;
break;
}
@@ -167,7 +176,7 @@
break;
}
- if (is_rsvd_bits_set(vcpu, pte, walker->level)) {
+ if (is_rsvd_bits_set(&vcpu->arch.mmu, pte, walker->level)) {
rsvd_fault = true;
break;
}
@@ -204,17 +213,28 @@
(PTTYPE == 64 || is_pse(vcpu))) ||
((walker->level == PT_PDPE_LEVEL) &&
is_large_pte(pte) &&
- is_long_mode(vcpu))) {
+ mmu->root_level == PT64_ROOT_LEVEL)) {
int lvl = walker->level;
+ gpa_t real_gpa;
+ gfn_t gfn;
+ u32 ac;
- walker->gfn = gpte_to_gfn_lvl(pte, lvl);
- walker->gfn += (addr & PT_LVL_OFFSET_MASK(lvl))
- >> PAGE_SHIFT;
+ gfn = gpte_to_gfn_lvl(pte, lvl);
+ gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT;
if (PTTYPE == 32 &&
walker->level == PT_DIRECTORY_LEVEL &&
is_cpuid_PSE36())
- walker->gfn += pse36_gfn_delta(pte);
+ gfn += pse36_gfn_delta(pte);
+
+ ac = write_fault | fetch_fault | user_fault;
+
+ real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn),
+ ac);
+ if (real_gpa == UNMAPPED_GVA)
+ return 0;
+
+ walker->gfn = real_gpa >> PAGE_SHIFT;
break;
}
@@ -249,18 +269,36 @@
walker->error_code = 0;
if (present)
walker->error_code |= PFERR_PRESENT_MASK;
- if (write_fault)
- walker->error_code |= PFERR_WRITE_MASK;
- if (user_fault)
- walker->error_code |= PFERR_USER_MASK;
- if (fetch_fault && is_nx(vcpu))
+
+ walker->error_code |= write_fault | user_fault;
+
+ if (fetch_fault && mmu->nx)
walker->error_code |= PFERR_FETCH_MASK;
if (rsvd_fault)
walker->error_code |= PFERR_RSVD_MASK;
+
+ vcpu->arch.fault.address = addr;
+ vcpu->arch.fault.error_code = walker->error_code;
+
trace_kvm_mmu_walker_error(walker->error_code);
return 0;
}
+static int FNAME(walk_addr)(struct guest_walker *walker,
+ struct kvm_vcpu *vcpu, gva_t addr, u32 access)
+{
+ return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.mmu, addr,
+ access);
+}
+
+static int FNAME(walk_addr_nested)(struct guest_walker *walker,
+ struct kvm_vcpu *vcpu, gva_t addr,
+ u32 access)
+{
+ return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu,
+ addr, access);
+}
+
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, const void *pte)
{
@@ -302,14 +340,87 @@
static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
struct guest_walker *gw, int level)
{
- int r;
pt_element_t curr_pte;
+ gpa_t base_gpa, pte_gpa = gw->pte_gpa[level - 1];
+ u64 mask;
+ int r, index;
- r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 1],
+ if (level == PT_PAGE_TABLE_LEVEL) {
+ mask = PTE_PREFETCH_NUM * sizeof(pt_element_t) - 1;
+ base_gpa = pte_gpa & ~mask;
+ index = (pte_gpa - base_gpa) / sizeof(pt_element_t);
+
+ r = kvm_read_guest_atomic(vcpu->kvm, base_gpa,
+ gw->prefetch_ptes, sizeof(gw->prefetch_ptes));
+ curr_pte = gw->prefetch_ptes[index];
+ } else
+ r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa,
&curr_pte, sizeof(curr_pte));
+
return r || curr_pte != gw->ptes[level - 1];
}
+static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
+ u64 *sptep)
+{
+ struct kvm_mmu_page *sp;
+ struct kvm_mmu *mmu = &vcpu->arch.mmu;
+ pt_element_t *gptep = gw->prefetch_ptes;
+ u64 *spte;
+ int i;
+
+ sp = page_header(__pa(sptep));
+
+ if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+ return;
+
+ if (sp->role.direct)
+ return __direct_pte_prefetch(vcpu, sp, sptep);
+
+ i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1);
+ spte = sp->spt + i;
+
+ for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
+ pt_element_t gpte;
+ unsigned pte_access;
+ gfn_t gfn;
+ pfn_t pfn;
+ bool dirty;
+
+ if (spte == sptep)
+ continue;
+
+ if (*spte != shadow_trap_nonpresent_pte)
+ continue;
+
+ gpte = gptep[i];
+
+ if (!is_present_gpte(gpte) ||
+ is_rsvd_bits_set(mmu, gpte, PT_PAGE_TABLE_LEVEL)) {
+ if (!sp->unsync)
+ __set_spte(spte, shadow_notrap_nonpresent_pte);
+ continue;
+ }
+
+ if (!(gpte & PT_ACCESSED_MASK))
+ continue;
+
+ pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+ gfn = gpte_to_gfn(gpte);
+ dirty = is_dirty_gpte(gpte);
+ pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
+ (pte_access & ACC_WRITE_MASK) && dirty);
+ if (is_error_pfn(pfn)) {
+ kvm_release_pfn_clean(pfn);
+ break;
+ }
+
+ mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
+ dirty, NULL, PT_PAGE_TABLE_LEVEL, gfn,
+ pfn, true, true);
+ }
+}
+
/*
* Fetch a shadow pte for a specific level in the paging hierarchy.
*/
@@ -391,6 +502,7 @@
mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access,
user_fault, write_fault, dirty, ptwrite, it.level,
gw->gfn, pfn, false, true);
+ FNAME(pte_prefetch)(vcpu, gw, it.sptep);
return it.sptep;
@@ -420,7 +532,6 @@
{
int write_fault = error_code & PFERR_WRITE_MASK;
int user_fault = error_code & PFERR_USER_MASK;
- int fetch_fault = error_code & PFERR_FETCH_MASK;
struct guest_walker walker;
u64 *sptep;
int write_pt = 0;
@@ -430,7 +541,6 @@
unsigned long mmu_seq;
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
- kvm_mmu_audit(vcpu, "pre page fault");
r = mmu_topup_memory_caches(vcpu);
if (r)
@@ -439,15 +549,14 @@
/*
* Look up the guest pte for the faulting address.
*/
- r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
- fetch_fault);
+ r = FNAME(walk_addr)(&walker, vcpu, addr, error_code);
/*
* The page is not mapped by the guest. Let the guest handle it.
*/
if (!r) {
pgprintk("%s: guest page fault\n", __func__);
- inject_page_fault(vcpu, addr, walker.error_code);
+ inject_page_fault(vcpu);
vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
return 0;
}
@@ -468,6 +577,8 @@
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu, mmu_seq))
goto out_unlock;
+
+ trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
kvm_mmu_free_some_pages(vcpu);
sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
level, &write_pt, pfn);
@@ -479,7 +590,7 @@
vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
++vcpu->stat.pf_fixed;
- kvm_mmu_audit(vcpu, "post page fault (fixed)");
+ trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
spin_unlock(&vcpu->kvm->mmu_lock);
return write_pt;
@@ -556,10 +667,25 @@
gpa_t gpa = UNMAPPED_GVA;
int r;
- r = FNAME(walk_addr)(&walker, vcpu, vaddr,
- !!(access & PFERR_WRITE_MASK),
- !!(access & PFERR_USER_MASK),
- !!(access & PFERR_FETCH_MASK));
+ r = FNAME(walk_addr)(&walker, vcpu, vaddr, access);
+
+ if (r) {
+ gpa = gfn_to_gpa(walker.gfn);
+ gpa |= vaddr & ~PAGE_MASK;
+ } else if (error)
+ *error = walker.error_code;
+
+ return gpa;
+}
+
+static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
+ u32 access, u32 *error)
+{
+ struct guest_walker walker;
+ gpa_t gpa = UNMAPPED_GVA;
+ int r;
+
+ r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access);
if (r) {
gpa = gfn_to_gpa(walker.gfn);
@@ -638,7 +764,7 @@
return -EINVAL;
gfn = gpte_to_gfn(gpte);
- if (is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL)
+ if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)
|| gfn != sp->gfns[i] || !is_present_gpte(gpte)
|| !(gpte & PT_ACCESSED_MASK)) {
u64 nonpresent;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8a3f9f6..82e144a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4,7 +4,7 @@
* AMD SVM support
*
* Copyright (C) 2006 Qumranet, Inc.
- * Copyright 2010 Red Hat, Inc. and/or its affilates.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
*
* Authors:
* Yaniv Kamay <yaniv@qumranet.com>
@@ -88,6 +88,14 @@
/* A VMEXIT is required but not yet emulated */
bool exit_required;
+ /*
+ * If we vmexit during an instruction emulation we need this to restore
+ * the l1 guest rip after the emulation
+ */
+ unsigned long vmexit_rip;
+ unsigned long vmexit_rsp;
+ unsigned long vmexit_rax;
+
/* cache for intercepts of the guest */
u16 intercept_cr_read;
u16 intercept_cr_write;
@@ -96,6 +104,8 @@
u32 intercept_exceptions;
u64 intercept;
+ /* Nested Paging related state */
+ u64 nested_cr3;
};
#define MSRPM_OFFSETS 16
@@ -284,6 +294,15 @@
force_new_asid(vcpu);
}
+static int get_npt_level(void)
+{
+#ifdef CONFIG_X86_64
+ return PT64_ROOT_LEVEL;
+#else
+ return PT32E_ROOT_LEVEL;
+#endif
+}
+
static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
vcpu->arch.efer = efer;
@@ -701,6 +720,29 @@
seg->base = 0;
}
+static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ u64 g_tsc_offset = 0;
+
+ if (is_nested(svm)) {
+ g_tsc_offset = svm->vmcb->control.tsc_offset -
+ svm->nested.hsave->control.tsc_offset;
+ svm->nested.hsave->control.tsc_offset = offset;
+ }
+
+ svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
+}
+
+static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm->vmcb->control.tsc_offset += adjustment;
+ if (is_nested(svm))
+ svm->nested.hsave->control.tsc_offset += adjustment;
+}
+
static void init_vmcb(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
@@ -793,7 +835,7 @@
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
- save->efer = EFER_SVME;
+ svm_set_efer(&svm->vcpu, 0);
save->dr6 = 0xffff0ff0;
save->dr7 = 0x400;
save->rflags = 2;
@@ -804,8 +846,8 @@
* This is the guest-visible cr0 value.
* svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
*/
- svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
- (void)kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0);
+ svm->vcpu.arch.cr0 = 0;
+ (void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
save->cr4 = X86_CR4_PAE;
/* rdx = ?? */
@@ -901,7 +943,7 @@
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
svm->asid_generation = 0;
init_vmcb(svm);
- svm->vmcb->control.tsc_offset = 0-native_read_tsc();
+ kvm_write_tsc(&svm->vcpu, 0);
err = fx_init(&svm->vcpu);
if (err)
@@ -947,20 +989,6 @@
int i;
if (unlikely(cpu != vcpu->cpu)) {
- u64 delta;
-
- if (check_tsc_unstable()) {
- /*
- * Make sure that the guest sees a monotonically
- * increasing TSC.
- */
- delta = vcpu->arch.host_tsc - native_read_tsc();
- svm->vmcb->control.tsc_offset += delta;
- if (is_nested(svm))
- svm->nested.hsave->control.tsc_offset += delta;
- }
- vcpu->cpu = cpu;
- kvm_migrate_timers(vcpu);
svm->asid_generation = 0;
}
@@ -976,8 +1004,6 @@
++vcpu->stat.host_state_reload;
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
-
- vcpu->arch.host_tsc = native_read_tsc();
}
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
@@ -995,7 +1021,7 @@
switch (reg) {
case VCPU_EXREG_PDPTR:
BUG_ON(!npt_enabled);
- load_pdptrs(vcpu, vcpu->arch.cr3);
+ load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3);
break;
default:
BUG();
@@ -1206,8 +1232,12 @@
if (old == new) {
/* cr0 write with ts and mp unchanged */
svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
- if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE)
+ if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) {
+ svm->nested.vmexit_rip = kvm_rip_read(vcpu);
+ svm->nested.vmexit_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
+ svm->nested.vmexit_rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
return;
+ }
}
}
@@ -1581,6 +1611,54 @@
return 1;
}
+static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ return svm->nested.nested_cr3;
+}
+
+static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
+ unsigned long root)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm->vmcb->control.nested_cr3 = root;
+ force_new_asid(vcpu);
+}
+
+static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm->vmcb->control.exit_code = SVM_EXIT_NPF;
+ svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_info_1 = vcpu->arch.fault.error_code;
+ svm->vmcb->control.exit_info_2 = vcpu->arch.fault.address;
+
+ nested_svm_vmexit(svm);
+}
+
+static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
+
+ vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
+ vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
+ vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
+ vcpu->arch.mmu.shadow_root_level = get_npt_level();
+ vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
+
+ return r;
+}
+
+static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.walk_mmu = &vcpu->arch.mmu;
+}
+
static int nested_svm_check_permissions(struct vcpu_svm *svm)
{
if (!(svm->vcpu.arch.efer & EFER_SVME)
@@ -1629,6 +1707,14 @@
if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
return false;
+ /*
+ * if vmexit was already requested (by intercepted exception
+ * for instance) do not overwrite it with "external interrupt"
+ * vmexit.
+ */
+ if (svm->nested.exit_required)
+ return false;
+
svm->vmcb->control.exit_code = SVM_EXIT_INTR;
svm->vmcb->control.exit_info_1 = 0;
svm->vmcb->control.exit_info_2 = 0;
@@ -1896,6 +1982,7 @@
nested_vmcb->save.ds = vmcb->save.ds;
nested_vmcb->save.gdtr = vmcb->save.gdtr;
nested_vmcb->save.idtr = vmcb->save.idtr;
+ nested_vmcb->save.efer = svm->vcpu.arch.efer;
nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu);
nested_vmcb->save.cr3 = svm->vcpu.arch.cr3;
nested_vmcb->save.cr2 = vmcb->save.cr2;
@@ -1917,6 +2004,7 @@
nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
+ nested_vmcb->control.next_rip = vmcb->control.next_rip;
/*
* If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
@@ -1947,6 +2035,8 @@
kvm_clear_exception_queue(&svm->vcpu);
kvm_clear_interrupt_queue(&svm->vcpu);
+ svm->nested.nested_cr3 = 0;
+
/* Restore selected save entries */
svm->vmcb->save.es = hsave->save.es;
svm->vmcb->save.cs = hsave->save.cs;
@@ -1973,6 +2063,7 @@
nested_svm_unmap(page);
+ nested_svm_uninit_mmu_context(&svm->vcpu);
kvm_mmu_reset_context(&svm->vcpu);
kvm_mmu_load(&svm->vcpu);
@@ -2012,6 +2103,20 @@
return true;
}
+static bool nested_vmcb_checks(struct vmcb *vmcb)
+{
+ if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
+ return false;
+
+ if (vmcb->control.asid == 0)
+ return false;
+
+ if (vmcb->control.nested_ctl && !npt_enabled)
+ return false;
+
+ return true;
+}
+
static bool nested_svm_vmrun(struct vcpu_svm *svm)
{
struct vmcb *nested_vmcb;
@@ -2026,7 +2131,18 @@
if (!nested_vmcb)
return false;
- trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, vmcb_gpa,
+ if (!nested_vmcb_checks(nested_vmcb)) {
+ nested_vmcb->control.exit_code = SVM_EXIT_ERR;
+ nested_vmcb->control.exit_code_hi = 0;
+ nested_vmcb->control.exit_info_1 = 0;
+ nested_vmcb->control.exit_info_2 = 0;
+
+ nested_svm_unmap(page);
+
+ return false;
+ }
+
+ trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
nested_vmcb->save.rip,
nested_vmcb->control.int_ctl,
nested_vmcb->control.event_inj,
@@ -2055,7 +2171,7 @@
hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
hsave->save.cr4 = svm->vcpu.arch.cr4;
hsave->save.rflags = vmcb->save.rflags;
- hsave->save.rip = svm->next_rip;
+ hsave->save.rip = kvm_rip_read(&svm->vcpu);
hsave->save.rsp = vmcb->save.rsp;
hsave->save.rax = vmcb->save.rax;
if (npt_enabled)
@@ -2070,6 +2186,12 @@
else
svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
+ if (nested_vmcb->control.nested_ctl) {
+ kvm_mmu_unload(&svm->vcpu);
+ svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
+ nested_svm_init_mmu_context(&svm->vcpu);
+ }
+
/* Load the nested guest state */
svm->vmcb->save.es = nested_vmcb->save.es;
svm->vmcb->save.cs = nested_vmcb->save.cs;
@@ -2227,8 +2349,8 @@
if (nested_svm_check_permissions(svm))
return 1;
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
- skip_emulated_instruction(&svm->vcpu);
+ /* Save rip after vmrun instruction */
+ kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
if (!nested_svm_vmrun(svm))
return 1;
@@ -2257,6 +2379,7 @@
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu);
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
enable_gif(svm);
@@ -2399,6 +2522,23 @@
return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE;
}
+static int cr0_write_interception(struct vcpu_svm *svm)
+{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ int r;
+
+ r = emulate_instruction(&svm->vcpu, 0, 0, 0);
+
+ if (svm->nested.vmexit_rip) {
+ kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip);
+ kvm_register_write(vcpu, VCPU_REGS_RSP, svm->nested.vmexit_rsp);
+ kvm_register_write(vcpu, VCPU_REGS_RAX, svm->nested.vmexit_rax);
+ svm->nested.vmexit_rip = 0;
+ }
+
+ return r == EMULATE_DONE;
+}
+
static int cr8_write_interception(struct vcpu_svm *svm)
{
struct kvm_run *kvm_run = svm->vcpu.run;
@@ -2542,20 +2682,9 @@
struct vcpu_svm *svm = to_svm(vcpu);
switch (ecx) {
- case MSR_IA32_TSC: {
- u64 tsc_offset = data - native_read_tsc();
- u64 g_tsc_offset = 0;
-
- if (is_nested(svm)) {
- g_tsc_offset = svm->vmcb->control.tsc_offset -
- svm->nested.hsave->control.tsc_offset;
- svm->nested.hsave->control.tsc_offset = tsc_offset;
- }
-
- svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset;
-
+ case MSR_IA32_TSC:
+ kvm_write_tsc(vcpu, data);
break;
- }
case MSR_STAR:
svm->vmcb->save.star = data;
break;
@@ -2643,6 +2772,7 @@
{
struct kvm_run *kvm_run = svm->vcpu.run;
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
svm_clear_vintr(svm);
svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
/*
@@ -2672,7 +2802,7 @@
[SVM_EXIT_READ_CR4] = emulate_on_interception,
[SVM_EXIT_READ_CR8] = emulate_on_interception,
[SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception,
- [SVM_EXIT_WRITE_CR0] = emulate_on_interception,
+ [SVM_EXIT_WRITE_CR0] = cr0_write_interception,
[SVM_EXIT_WRITE_CR3] = emulate_on_interception,
[SVM_EXIT_WRITE_CR4] = emulate_on_interception,
[SVM_EXIT_WRITE_CR8] = cr8_write_interception,
@@ -2871,7 +3001,8 @@
if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
- exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH)
+ exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
+ exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
"exit_code 0x%x\n",
__func__, svm->vmcb->control.exit_int_info,
@@ -3088,8 +3219,10 @@
svm->int3_injected = 0;
- if (svm->vcpu.arch.hflags & HF_IRET_MASK)
+ if (svm->vcpu.arch.hflags & HF_IRET_MASK) {
svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+ }
svm->vcpu.arch.nmi_injected = false;
kvm_clear_exception_queue(&svm->vcpu);
@@ -3098,6 +3231,8 @@
if (!(exitintinfo & SVM_EXITINTINFO_VALID))
return;
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+
vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
@@ -3134,6 +3269,17 @@
}
}
+static void svm_cancel_injection(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb_control_area *control = &svm->vmcb->control;
+
+ control->exit_int_info = control->event_inj;
+ control->exit_int_info_err = control->event_inj_err;
+ control->event_inj = 0;
+ svm_complete_interrupts(svm);
+}
+
#ifdef CONFIG_X86_64
#define R "r"
#else
@@ -3167,9 +3313,6 @@
savesegment(gs, gs_selector);
ldt_selector = kvm_read_ldt();
svm->vmcb->save.cr2 = vcpu->arch.cr2;
- /* required for live migration with NPT */
- if (npt_enabled)
- svm->vmcb->save.cr3 = vcpu->arch.cr3;
clgi();
@@ -3291,16 +3434,22 @@
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (npt_enabled) {
- svm->vmcb->control.nested_cr3 = root;
- force_new_asid(vcpu);
- return;
- }
-
svm->vmcb->save.cr3 = root;
force_new_asid(vcpu);
}
+static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm->vmcb->control.nested_cr3 = root;
+
+ /* Also sync guest cr3 here in case we live migrate */
+ svm->vmcb->save.cr3 = vcpu->arch.cr3;
+
+ force_new_asid(vcpu);
+}
+
static int is_disabled(void)
{
u64 vm_cr;
@@ -3333,15 +3482,6 @@
return false;
}
-static int get_npt_level(void)
-{
-#ifdef CONFIG_X86_64
- return PT64_ROOT_LEVEL;
-#else
- return PT32E_ROOT_LEVEL;
-#endif
-}
-
static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
{
return 0;
@@ -3354,12 +3494,25 @@
static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
{
switch (func) {
+ case 0x80000001:
+ if (nested)
+ entry->ecx |= (1 << 2); /* Set SVM bit */
+ break;
case 0x8000000A:
entry->eax = 1; /* SVM revision 1 */
entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
ASID emulation to nested SVM */
entry->ecx = 0; /* Reserved */
- entry->edx = 0; /* Do not support any additional features */
+ entry->edx = 0; /* Per default do not support any
+ additional features */
+
+ /* Support next_rip if host supports it */
+ if (svm_has(SVM_FEATURE_NRIP))
+ entry->edx |= SVM_FEATURE_NRIP;
+
+ /* Support NPT for the guest if enabled */
+ if (npt_enabled)
+ entry->edx |= SVM_FEATURE_NPT;
break;
}
@@ -3497,6 +3650,7 @@
.set_irq = svm_set_irq,
.set_nmi = svm_inject_nmi,
.queue_exception = svm_queue_exception,
+ .cancel_injection = svm_cancel_injection,
.interrupt_allowed = svm_interrupt_allowed,
.nmi_allowed = svm_nmi_allowed,
.get_nmi_mask = svm_get_nmi_mask,
@@ -3519,6 +3673,11 @@
.set_supported_cpuid = svm_set_supported_cpuid,
.has_wbinvd_exit = svm_has_wbinvd_exit,
+
+ .write_tsc_offset = svm_write_tsc_offset,
+ .adjust_tsc_offset = svm_adjust_tsc_offset,
+
+ .set_tdp_cr3 = set_tdp_cr3,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c
index e16a0db..fc7a101 100644
--- a/arch/x86/kvm/timer.c
+++ b/arch/x86/kvm/timer.c
@@ -6,7 +6,7 @@
*
* timer support
*
- * Copyright 2010 Red Hat, Inc. and/or its affilates.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7bddfab..8da0e45 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5,7 +5,7 @@
* machines without emulation or binary translation.
*
* Copyright (C) 2006 Qumranet, Inc.
- * Copyright 2010 Red Hat, Inc. and/or its affilates.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
*
* Authors:
* Avi Kivity <avi@qumranet.com>
@@ -125,6 +125,7 @@
unsigned long host_rsp;
int launched;
u8 fail;
+ u32 exit_intr_info;
u32 idt_vectoring_info;
struct shared_msr_entry *guest_msrs;
int nmsrs;
@@ -154,11 +155,6 @@
u32 limit;
u32 ar;
} tr, es, ds, fs, gs;
- struct {
- bool pending;
- u8 vector;
- unsigned rip;
- } irq;
} rmode;
int vpid;
bool emulation_required;
@@ -505,7 +501,6 @@
vmcs_clear(vmx->vmcs);
if (per_cpu(current_vmcs, cpu) == vmx->vmcs)
per_cpu(current_vmcs, cpu) = NULL;
- rdtscll(vmx->vcpu.arch.host_tsc);
list_del(&vmx->local_vcpus_link);
vmx->vcpu.cpu = -1;
vmx->launched = 0;
@@ -706,11 +701,10 @@
/*
* VT restores TR but not its size. Useless.
*/
- struct desc_ptr gdt;
+ struct desc_ptr *gdt = &__get_cpu_var(host_gdt);
struct desc_struct *descs;
- native_store_gdt(&gdt);
- descs = (void *)gdt.address;
+ descs = (void *)gdt->address;
descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
load_TR_desc();
}
@@ -753,7 +747,7 @@
static unsigned long segment_base(u16 selector)
{
- struct desc_ptr gdt;
+ struct desc_ptr *gdt = &__get_cpu_var(host_gdt);
struct desc_struct *d;
unsigned long table_base;
unsigned long v;
@@ -761,8 +755,7 @@
if (!(selector & ~3))
return 0;
- native_store_gdt(&gdt);
- table_base = gdt.address;
+ table_base = gdt->address;
if (selector & 4) { /* from ldt */
u16 ldt_selector = kvm_read_ldt();
@@ -883,7 +876,6 @@
static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u64 tsc_this, delta, new_offset;
u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
if (!vmm_exclusive)
@@ -897,37 +889,24 @@
}
if (vcpu->cpu != cpu) {
- struct desc_ptr dt;
+ struct desc_ptr *gdt = &__get_cpu_var(host_gdt);
unsigned long sysenter_esp;
- kvm_migrate_timers(vcpu);
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
local_irq_disable();
list_add(&vmx->local_vcpus_link,
&per_cpu(vcpus_on_cpu, cpu));
local_irq_enable();
- vcpu->cpu = cpu;
/*
* Linux uses per-cpu TSS and GDT, so set these when switching
* processors.
*/
vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
- native_store_gdt(&dt);
- vmcs_writel(HOST_GDTR_BASE, dt.address); /* 22.2.4 */
+ vmcs_writel(HOST_GDTR_BASE, gdt->address); /* 22.2.4 */
rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
-
- /*
- * Make sure the time stamp counter is monotonous.
- */
- rdtscll(tsc_this);
- if (tsc_this < vcpu->arch.host_tsc) {
- delta = vcpu->arch.host_tsc - tsc_this;
- new_offset = vmcs_read64(TSC_OFFSET) + delta;
- vmcs_write64(TSC_OFFSET, new_offset);
- }
}
}
@@ -1044,16 +1023,8 @@
}
if (vmx->rmode.vm86_active) {
- vmx->rmode.irq.pending = true;
- vmx->rmode.irq.vector = nr;
- vmx->rmode.irq.rip = kvm_rip_read(vcpu);
- if (kvm_exception_is_soft(nr))
- vmx->rmode.irq.rip +=
- vmx->vcpu.arch.event_exit_inst_len;
- intr_info |= INTR_TYPE_SOFT_INTR;
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
- vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
- kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
+ if (kvm_inject_realmode_interrupt(vcpu, nr) != EMULATE_DONE)
+ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
return;
}
@@ -1149,12 +1120,17 @@
}
/*
- * writes 'guest_tsc' into guest's timestamp counter "register"
- * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc
+ * writes 'offset' into guest's timestamp counter offset register
*/
-static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
+static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
- vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc);
+ vmcs_write64(TSC_OFFSET, offset);
+}
+
+static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
+{
+ u64 offset = vmcs_read64(TSC_OFFSET);
+ vmcs_write64(TSC_OFFSET, offset + adjustment);
}
/*
@@ -1227,7 +1203,6 @@
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct shared_msr_entry *msr;
- u64 host_tsc;
int ret = 0;
switch (msr_index) {
@@ -1257,8 +1232,7 @@
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
case MSR_IA32_TSC:
- rdtscll(host_tsc);
- guest_write_tsc(data, host_tsc);
+ kvm_write_tsc(vcpu, data);
break;
case MSR_IA32_CR_PAT:
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
@@ -1856,20 +1830,20 @@
return;
if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
- vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]);
- vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]);
- vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]);
- vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]);
+ vmcs_write64(GUEST_PDPTR0, vcpu->arch.mmu.pdptrs[0]);
+ vmcs_write64(GUEST_PDPTR1, vcpu->arch.mmu.pdptrs[1]);
+ vmcs_write64(GUEST_PDPTR2, vcpu->arch.mmu.pdptrs[2]);
+ vmcs_write64(GUEST_PDPTR3, vcpu->arch.mmu.pdptrs[3]);
}
}
static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
{
if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
- vcpu->arch.pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
- vcpu->arch.pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
- vcpu->arch.pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
- vcpu->arch.pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
+ vcpu->arch.mmu.pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
+ vcpu->arch.mmu.pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
+ vcpu->arch.mmu.pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
+ vcpu->arch.mmu.pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
}
__set_bit(VCPU_EXREG_PDPTR,
@@ -2515,7 +2489,7 @@
{
u32 host_sysenter_cs, msr_low, msr_high;
u32 junk;
- u64 host_pat, tsc_this, tsc_base;
+ u64 host_pat;
unsigned long a;
struct desc_ptr dt;
int i;
@@ -2656,12 +2630,7 @@
vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
- tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
- rdtscll(tsc_this);
- if (tsc_this < vmx->vcpu.kvm->arch.vm_init_tsc)
- tsc_base = tsc_this;
-
- guest_write_tsc(0, tsc_base);
+ kvm_write_tsc(&vmx->vcpu, 0);
return 0;
}
@@ -2834,16 +2803,8 @@
++vcpu->stat.irq_injections;
if (vmx->rmode.vm86_active) {
- vmx->rmode.irq.pending = true;
- vmx->rmode.irq.vector = irq;
- vmx->rmode.irq.rip = kvm_rip_read(vcpu);
- if (vcpu->arch.interrupt.soft)
- vmx->rmode.irq.rip +=
- vmx->vcpu.arch.event_exit_inst_len;
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
- irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK);
- vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
- kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
+ if (kvm_inject_realmode_interrupt(vcpu, irq) != EMULATE_DONE)
+ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
return;
}
intr = irq | INTR_INFO_VALID_MASK;
@@ -2875,14 +2836,8 @@
++vcpu->stat.nmi_injections;
if (vmx->rmode.vm86_active) {
- vmx->rmode.irq.pending = true;
- vmx->rmode.irq.vector = NMI_VECTOR;
- vmx->rmode.irq.rip = kvm_rip_read(vcpu);
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
- NMI_VECTOR | INTR_TYPE_SOFT_INTR |
- INTR_INFO_VALID_MASK);
- vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
- kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
+ if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE)
+ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
return;
}
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
@@ -3346,6 +3301,7 @@
static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
{
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
return 1;
}
@@ -3358,6 +3314,8 @@
cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+
++vcpu->stat.irq_window_exits;
/*
@@ -3614,6 +3572,7 @@
cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
++vcpu->stat.nmi_window_exits;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
return 1;
}
@@ -3623,8 +3582,17 @@
struct vcpu_vmx *vmx = to_vmx(vcpu);
enum emulation_result err = EMULATE_DONE;
int ret = 1;
+ u32 cpu_exec_ctrl;
+ bool intr_window_requested;
+
+ cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
while (!guest_state_valid(vcpu)) {
+ if (intr_window_requested
+ && (kvm_get_rflags(&vmx->vcpu) & X86_EFLAGS_IF))
+ return handle_interrupt_window(&vmx->vcpu);
+
err = emulate_instruction(vcpu, 0, 0, 0);
if (err == EMULATE_DO_MMIO) {
@@ -3790,18 +3758,9 @@
vmcs_write32(TPR_THRESHOLD, irr);
}
-static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
+static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
{
- u32 exit_intr_info;
- u32 idt_vectoring_info = vmx->idt_vectoring_info;
- bool unblock_nmi;
- u8 vector;
- int type;
- bool idtv_info_valid;
-
- exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-
- vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
+ u32 exit_intr_info = vmx->exit_intr_info;
/* Handle machine checks before interrupts are enabled */
if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
@@ -3816,8 +3775,16 @@
asm("int $2");
kvm_after_handle_nmi(&vmx->vcpu);
}
+}
- idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
+static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
+{
+ u32 exit_intr_info = vmx->exit_intr_info;
+ bool unblock_nmi;
+ u8 vector;
+ bool idtv_info_valid;
+
+ idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
if (cpu_has_virtual_nmis()) {
unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
@@ -3839,6 +3806,18 @@
} else if (unlikely(vmx->soft_vnmi_blocked))
vmx->vnmi_blocked_time +=
ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
+}
+
+static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
+ u32 idt_vectoring_info,
+ int instr_len_field,
+ int error_code_field)
+{
+ u8 vector;
+ int type;
+ bool idtv_info_valid;
+
+ idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
vmx->vcpu.arch.nmi_injected = false;
kvm_clear_exception_queue(&vmx->vcpu);
@@ -3847,6 +3826,8 @@
if (!idtv_info_valid)
return;
+ kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
+
vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
@@ -3863,18 +3844,18 @@
break;
case INTR_TYPE_SOFT_EXCEPTION:
vmx->vcpu.arch.event_exit_inst_len =
- vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+ vmcs_read32(instr_len_field);
/* fall through */
case INTR_TYPE_HARD_EXCEPTION:
if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
- u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE);
+ u32 err = vmcs_read32(error_code_field);
kvm_queue_exception_e(&vmx->vcpu, vector, err);
} else
kvm_queue_exception(&vmx->vcpu, vector);
break;
case INTR_TYPE_SOFT_INTR:
vmx->vcpu.arch.event_exit_inst_len =
- vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+ vmcs_read32(instr_len_field);
/* fall through */
case INTR_TYPE_EXT_INTR:
kvm_queue_interrupt(&vmx->vcpu, vector,
@@ -3885,27 +3866,21 @@
}
}
-/*
- * Failure to inject an interrupt should give us the information
- * in IDT_VECTORING_INFO_FIELD. However, if the failure occurs
- * when fetching the interrupt redirection bitmap in the real-mode
- * tss, this doesn't happen. So we do it ourselves.
- */
-static void fixup_rmode_irq(struct vcpu_vmx *vmx)
+static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
{
- vmx->rmode.irq.pending = 0;
- if (kvm_rip_read(&vmx->vcpu) + 1 != vmx->rmode.irq.rip)
- return;
- kvm_rip_write(&vmx->vcpu, vmx->rmode.irq.rip);
- if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
- vmx->idt_vectoring_info &= ~VECTORING_INFO_TYPE_MASK;
- vmx->idt_vectoring_info |= INTR_TYPE_EXT_INTR;
- return;
- }
- vmx->idt_vectoring_info =
- VECTORING_INFO_VALID_MASK
- | INTR_TYPE_EXT_INTR
- | vmx->rmode.irq.vector;
+ __vmx_complete_interrupts(vmx, vmx->idt_vectoring_info,
+ VM_EXIT_INSTRUCTION_LEN,
+ IDT_VECTORING_ERROR_CODE);
+}
+
+static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
+{
+ __vmx_complete_interrupts(to_vmx(vcpu),
+ vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
+ VM_ENTRY_INSTRUCTION_LEN,
+ VM_ENTRY_EXCEPTION_ERROR_CODE);
+
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
}
#ifdef CONFIG_X86_64
@@ -4032,7 +4007,7 @@
#endif
[cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2))
: "cc", "memory"
- , R"bx", R"di", R"si"
+ , R"ax", R"bx", R"di", R"si"
#ifdef CONFIG_X86_64
, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
#endif
@@ -4043,12 +4018,15 @@
vcpu->arch.regs_dirty = 0;
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
- if (vmx->rmode.irq.pending)
- fixup_rmode_irq(vmx);
asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
vmx->launched = 1;
+ vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
+ vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+
+ vmx_complete_atomic_exit(vmx);
+ vmx_recover_nmi_blocking(vmx);
vmx_complete_interrupts(vmx);
}
@@ -4119,6 +4097,7 @@
cpu = get_cpu();
vmx_vcpu_load(&vmx->vcpu, cpu);
+ vmx->vcpu.cpu = cpu;
err = vmx_vcpu_setup(vmx);
vmx_vcpu_put(&vmx->vcpu);
put_cpu();
@@ -4334,6 +4313,7 @@
.set_irq = vmx_inject_irq,
.set_nmi = vmx_inject_nmi,
.queue_exception = vmx_queue_exception,
+ .cancel_injection = vmx_cancel_injection,
.interrupt_allowed = vmx_interrupt_allowed,
.nmi_allowed = vmx_nmi_allowed,
.get_nmi_mask = vmx_get_nmi_mask,
@@ -4356,6 +4336,11 @@
.set_supported_cpuid = vmx_set_supported_cpuid,
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
+
+ .write_tsc_offset = vmx_write_tsc_offset,
+ .adjust_tsc_offset = vmx_adjust_tsc_offset,
+
+ .set_tdp_cr3 = vmx_set_cr3,
};
static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6c2ecf0..2288ad8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6,7 +6,7 @@
* Copyright (C) 2006 Qumranet, Inc.
* Copyright (C) 2008 Qumranet, Inc.
* Copyright IBM Corporation, 2008
- * Copyright 2010 Red Hat, Inc. and/or its affilates.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
*
* Authors:
* Avi Kivity <avi@qumranet.com>
@@ -55,6 +55,8 @@
#include <asm/mce.h>
#include <asm/i387.h>
#include <asm/xcr.h>
+#include <asm/pvclock.h>
+#include <asm/div64.h>
#define MAX_IO_MSRS 256
#define CR0_RESERVED_BITS \
@@ -71,7 +73,7 @@
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
#define KVM_MAX_MCE_BANKS 32
-#define KVM_MCE_CAP_SUPPORTED MCG_CTL_P
+#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
/* EFER defaults:
* - enable syscall per default because its emulated by KVM
@@ -282,6 +284,8 @@
u32 prev_nr;
int class1, class2;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+
if (!vcpu->arch.exception.pending) {
queue:
vcpu->arch.exception.pending = true;
@@ -327,16 +331,28 @@
}
EXPORT_SYMBOL_GPL(kvm_requeue_exception);
-void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
- u32 error_code)
+void kvm_inject_page_fault(struct kvm_vcpu *vcpu)
{
+ unsigned error_code = vcpu->arch.fault.error_code;
+
++vcpu->stat.pf_guest;
- vcpu->arch.cr2 = addr;
+ vcpu->arch.cr2 = vcpu->arch.fault.address;
kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
}
+void kvm_propagate_fault(struct kvm_vcpu *vcpu)
+{
+ if (mmu_is_nested(vcpu) && !vcpu->arch.fault.nested)
+ vcpu->arch.nested_mmu.inject_page_fault(vcpu);
+ else
+ vcpu->arch.mmu.inject_page_fault(vcpu);
+
+ vcpu->arch.fault.nested = false;
+}
+
void kvm_inject_nmi(struct kvm_vcpu *vcpu)
{
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
vcpu->arch.nmi_pending = 1;
}
EXPORT_SYMBOL_GPL(kvm_inject_nmi);
@@ -367,18 +383,49 @@
EXPORT_SYMBOL_GPL(kvm_require_cpl);
/*
+ * This function will be used to read from the physical memory of the currently
+ * running guest. The difference to kvm_read_guest_page is that this function
+ * can read from guest physical or from the guest's guest physical memory.
+ */
+int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ gfn_t ngfn, void *data, int offset, int len,
+ u32 access)
+{
+ gfn_t real_gfn;
+ gpa_t ngpa;
+
+ ngpa = gfn_to_gpa(ngfn);
+ real_gfn = mmu->translate_gpa(vcpu, ngpa, access);
+ if (real_gfn == UNMAPPED_GVA)
+ return -EFAULT;
+
+ real_gfn = gpa_to_gfn(real_gfn);
+
+ return kvm_read_guest_page(vcpu->kvm, real_gfn, data, offset, len);
+}
+EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
+
+int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
+ void *data, int offset, int len, u32 access)
+{
+ return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
+ data, offset, len, access);
+}
+
+/*
* Load the pae pdptrs. Return true is they are all valid.
*/
-int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
+int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
{
gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
int i;
int ret;
- u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
+ u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
- ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
- offset * sizeof(u64), sizeof(pdpte));
+ ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
+ offset * sizeof(u64), sizeof(pdpte),
+ PFERR_USER_MASK|PFERR_WRITE_MASK);
if (ret < 0) {
ret = 0;
goto out;
@@ -392,7 +439,7 @@
}
ret = 1;
- memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
+ memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
__set_bit(VCPU_EXREG_PDPTR,
(unsigned long *)&vcpu->arch.regs_avail);
__set_bit(VCPU_EXREG_PDPTR,
@@ -405,8 +452,10 @@
static bool pdptrs_changed(struct kvm_vcpu *vcpu)
{
- u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
+ u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
bool changed = true;
+ int offset;
+ gfn_t gfn;
int r;
if (is_long_mode(vcpu) || !is_pae(vcpu))
@@ -416,10 +465,13 @@
(unsigned long *)&vcpu->arch.regs_avail))
return true;
- r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
+ gfn = (vcpu->arch.cr3 & ~31u) >> PAGE_SHIFT;
+ offset = (vcpu->arch.cr3 & ~31u) & (PAGE_SIZE - 1);
+ r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
+ PFERR_USER_MASK | PFERR_WRITE_MASK);
if (r < 0)
goto out;
- changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0;
+ changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
out:
return changed;
@@ -458,7 +510,8 @@
return 1;
} else
#endif
- if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3))
+ if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
+ vcpu->arch.cr3))
return 1;
}
@@ -547,7 +600,7 @@
return 1;
} else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
&& ((cr4 ^ old_cr4) & pdptr_bits)
- && !load_pdptrs(vcpu, vcpu->arch.cr3))
+ && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3))
return 1;
if (cr4 & X86_CR4_VMXE)
@@ -580,7 +633,8 @@
if (is_pae(vcpu)) {
if (cr3 & CR3_PAE_RESERVED_BITS)
return 1;
- if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3))
+ if (is_paging(vcpu) &&
+ !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
return 1;
}
/*
@@ -737,7 +791,7 @@
#ifdef CONFIG_X86_64
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
- MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
+ MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
};
static unsigned num_msrs_to_save;
@@ -838,7 +892,7 @@
/*
* The guest calculates current wall clock time by adding
- * system time (updated by kvm_write_guest_time below) to the
+ * system time (updated by kvm_guest_time_update below) to the
* wall clock specified here. guest system time equals host
* system time for us, thus we must fill in host boot time here.
*/
@@ -866,65 +920,229 @@
return quotient;
}
-static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *hv_clock)
+static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
+ s8 *pshift, u32 *pmultiplier)
{
- uint64_t nsecs = 1000000000LL;
+ uint64_t scaled64;
int32_t shift = 0;
uint64_t tps64;
uint32_t tps32;
- tps64 = tsc_khz * 1000LL;
- while (tps64 > nsecs*2) {
+ tps64 = base_khz * 1000LL;
+ scaled64 = scaled_khz * 1000LL;
+ while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
tps64 >>= 1;
shift--;
}
tps32 = (uint32_t)tps64;
- while (tps32 <= (uint32_t)nsecs) {
- tps32 <<= 1;
+ while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
+ if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
+ scaled64 >>= 1;
+ else
+ tps32 <<= 1;
shift++;
}
- hv_clock->tsc_shift = shift;
- hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
+ *pshift = shift;
+ *pmultiplier = div_frac(scaled64, tps32);
- pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
- __func__, tsc_khz, hv_clock->tsc_shift,
- hv_clock->tsc_to_system_mul);
+ pr_debug("%s: base_khz %u => %u, shift %d, mul %u\n",
+ __func__, base_khz, scaled_khz, shift, *pmultiplier);
+}
+
+static inline u64 get_kernel_ns(void)
+{
+ struct timespec ts;
+
+ WARN_ON(preemptible());
+ ktime_get_ts(&ts);
+ monotonic_to_bootbased(&ts);
+ return timespec_to_ns(&ts);
}
static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
+unsigned long max_tsc_khz;
-static void kvm_write_guest_time(struct kvm_vcpu *v)
+static inline int kvm_tsc_changes_freq(void)
{
- struct timespec ts;
+ int cpu = get_cpu();
+ int ret = !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
+ cpufreq_quick_get(cpu) != 0;
+ put_cpu();
+ return ret;
+}
+
+static inline u64 nsec_to_cycles(u64 nsec)
+{
+ u64 ret;
+
+ WARN_ON(preemptible());
+ if (kvm_tsc_changes_freq())
+ printk_once(KERN_WARNING
+ "kvm: unreliable cycle conversion on adjustable rate TSC\n");
+ ret = nsec * __get_cpu_var(cpu_tsc_khz);
+ do_div(ret, USEC_PER_SEC);
+ return ret;
+}
+
+static void kvm_arch_set_tsc_khz(struct kvm *kvm, u32 this_tsc_khz)
+{
+ /* Compute a scale to convert nanoseconds in TSC cycles */
+ kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
+ &kvm->arch.virtual_tsc_shift,
+ &kvm->arch.virtual_tsc_mult);
+ kvm->arch.virtual_tsc_khz = this_tsc_khz;
+}
+
+static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
+{
+ u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec,
+ vcpu->kvm->arch.virtual_tsc_mult,
+ vcpu->kvm->arch.virtual_tsc_shift);
+ tsc += vcpu->arch.last_tsc_write;
+ return tsc;
+}
+
+void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
+{
+ struct kvm *kvm = vcpu->kvm;
+ u64 offset, ns, elapsed;
+ unsigned long flags;
+ s64 sdiff;
+
+ spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
+ offset = data - native_read_tsc();
+ ns = get_kernel_ns();
+ elapsed = ns - kvm->arch.last_tsc_nsec;
+ sdiff = data - kvm->arch.last_tsc_write;
+ if (sdiff < 0)
+ sdiff = -sdiff;
+
+ /*
+ * Special case: close write to TSC within 5 seconds of
+ * another CPU is interpreted as an attempt to synchronize
+ * The 5 seconds is to accomodate host load / swapping as
+ * well as any reset of TSC during the boot process.
+ *
+ * In that case, for a reliable TSC, we can match TSC offsets,
+ * or make a best guest using elapsed value.
+ */
+ if (sdiff < nsec_to_cycles(5ULL * NSEC_PER_SEC) &&
+ elapsed < 5ULL * NSEC_PER_SEC) {
+ if (!check_tsc_unstable()) {
+ offset = kvm->arch.last_tsc_offset;
+ pr_debug("kvm: matched tsc offset for %llu\n", data);
+ } else {
+ u64 delta = nsec_to_cycles(elapsed);
+ offset += delta;
+ pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
+ }
+ ns = kvm->arch.last_tsc_nsec;
+ }
+ kvm->arch.last_tsc_nsec = ns;
+ kvm->arch.last_tsc_write = data;
+ kvm->arch.last_tsc_offset = offset;
+ kvm_x86_ops->write_tsc_offset(vcpu, offset);
+ spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
+
+ /* Reset of TSC must disable overshoot protection below */
+ vcpu->arch.hv_clock.tsc_timestamp = 0;
+ vcpu->arch.last_tsc_write = data;
+ vcpu->arch.last_tsc_nsec = ns;
+}
+EXPORT_SYMBOL_GPL(kvm_write_tsc);
+
+static int kvm_guest_time_update(struct kvm_vcpu *v)
+{
unsigned long flags;
struct kvm_vcpu_arch *vcpu = &v->arch;
void *shared_kaddr;
unsigned long this_tsc_khz;
-
- if ((!vcpu->time_page))
- return;
-
- this_tsc_khz = get_cpu_var(cpu_tsc_khz);
- if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) {
- kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock);
- vcpu->hv_clock_tsc_khz = this_tsc_khz;
- }
- put_cpu_var(cpu_tsc_khz);
+ s64 kernel_ns, max_kernel_ns;
+ u64 tsc_timestamp;
/* Keep irq disabled to prevent changes to the clock */
local_irq_save(flags);
- kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
- ktime_get_ts(&ts);
- monotonic_to_bootbased(&ts);
+ kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp);
+ kernel_ns = get_kernel_ns();
+ this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
+
+ if (unlikely(this_tsc_khz == 0)) {
+ local_irq_restore(flags);
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
+ return 1;
+ }
+
+ /*
+ * We may have to catch up the TSC to match elapsed wall clock
+ * time for two reasons, even if kvmclock is used.
+ * 1) CPU could have been running below the maximum TSC rate
+ * 2) Broken TSC compensation resets the base at each VCPU
+ * entry to avoid unknown leaps of TSC even when running
+ * again on the same CPU. This may cause apparent elapsed
+ * time to disappear, and the guest to stand still or run
+ * very slowly.
+ */
+ if (vcpu->tsc_catchup) {
+ u64 tsc = compute_guest_tsc(v, kernel_ns);
+ if (tsc > tsc_timestamp) {
+ kvm_x86_ops->adjust_tsc_offset(v, tsc - tsc_timestamp);
+ tsc_timestamp = tsc;
+ }
+ }
+
local_irq_restore(flags);
+ if (!vcpu->time_page)
+ return 0;
+
+ /*
+ * Time as measured by the TSC may go backwards when resetting the base
+ * tsc_timestamp. The reason for this is that the TSC resolution is
+ * higher than the resolution of the other clock scales. Thus, many
+ * possible measurments of the TSC correspond to one measurement of any
+ * other clock, and so a spread of values is possible. This is not a
+ * problem for the computation of the nanosecond clock; with TSC rates
+ * around 1GHZ, there can only be a few cycles which correspond to one
+ * nanosecond value, and any path through this code will inevitably
+ * take longer than that. However, with the kernel_ns value itself,
+ * the precision may be much lower, down to HZ granularity. If the
+ * first sampling of TSC against kernel_ns ends in the low part of the
+ * range, and the second in the high end of the range, we can get:
+ *
+ * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new
+ *
+ * As the sampling errors potentially range in the thousands of cycles,
+ * it is possible such a time value has already been observed by the
+ * guest. To protect against this, we must compute the system time as
+ * observed by the guest and ensure the new system time is greater.
+ */
+ max_kernel_ns = 0;
+ if (vcpu->hv_clock.tsc_timestamp && vcpu->last_guest_tsc) {
+ max_kernel_ns = vcpu->last_guest_tsc -
+ vcpu->hv_clock.tsc_timestamp;
+ max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
+ vcpu->hv_clock.tsc_to_system_mul,
+ vcpu->hv_clock.tsc_shift);
+ max_kernel_ns += vcpu->last_kernel_ns;
+ }
+
+ if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
+ kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
+ &vcpu->hv_clock.tsc_shift,
+ &vcpu->hv_clock.tsc_to_system_mul);
+ vcpu->hw_tsc_khz = this_tsc_khz;
+ }
+
+ if (max_kernel_ns > kernel_ns)
+ kernel_ns = max_kernel_ns;
+
/* With all the info we got, fill in the values */
-
- vcpu->hv_clock.system_time = ts.tv_nsec +
- (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset;
-
+ vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
+ vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
+ vcpu->last_kernel_ns = kernel_ns;
+ vcpu->last_guest_tsc = tsc_timestamp;
vcpu->hv_clock.flags = 0;
/*
@@ -942,16 +1160,7 @@
kunmap_atomic(shared_kaddr, KM_USER0);
mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
-}
-
-static int kvm_request_guest_time_update(struct kvm_vcpu *v)
-{
- struct kvm_vcpu_arch *vcpu = &v->arch;
-
- if (!vcpu->time_page)
- return 0;
- kvm_make_request(KVM_REQ_KVMCLOCK_UPDATE, v);
- return 1;
+ return 0;
}
static bool msr_mtrr_valid(unsigned msr)
@@ -1277,6 +1486,7 @@
}
vcpu->arch.time = data;
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
/* we verify if the enable bit is set... */
if (!(data & 1))
@@ -1292,8 +1502,6 @@
kvm_release_page_clean(vcpu->arch.time_page);
vcpu->arch.time_page = NULL;
}
-
- kvm_request_guest_time_update(vcpu);
break;
}
case MSR_IA32_MCG_CTL:
@@ -1330,6 +1538,16 @@
pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
"0x%x data 0x%llx\n", msr, data);
break;
+ case MSR_K7_CLK_CTL:
+ /*
+ * Ignore all writes to this no longer documented MSR.
+ * Writes are only relevant for old K7 processors,
+ * all pre-dating SVM, but a recommended workaround from
+ * AMD for these chips. It is possible to speicify the
+ * affected processor models on the command line, hence
+ * the need to ignore the workaround.
+ */
+ break;
case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
if (kvm_hv_msr_partition_wide(msr)) {
int r;
@@ -1522,6 +1740,20 @@
case 0xcd: /* fsb frequency */
data = 3;
break;
+ /*
+ * MSR_EBC_FREQUENCY_ID
+ * Conservative value valid for even the basic CPU models.
+ * Models 0,1: 000 in bits 23:21 indicating a bus speed of
+ * 100MHz, model 2 000 in bits 18:16 indicating 100MHz,
+ * and 266MHz for model 3, or 4. Set Core Clock
+ * Frequency to System Bus Frequency Ratio to 1 (bits
+ * 31:24) even though these are only valid for CPU
+ * models > 2, however guests may end up dividing or
+ * multiplying by zero otherwise.
+ */
+ case MSR_EBC_FREQUENCY_ID:
+ data = 1 << 24;
+ break;
case MSR_IA32_APICBASE:
data = kvm_get_apic_base(vcpu);
break;
@@ -1555,6 +1787,18 @@
case MSR_IA32_MCG_STATUS:
case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
return get_msr_mce(vcpu, msr, pdata);
+ case MSR_K7_CLK_CTL:
+ /*
+ * Provide expected ramp-up count for K7. All other
+ * are set to zero, indicating minimum divisors for
+ * every field.
+ *
+ * This prevents guest kernels on AMD host with CPU
+ * type 6, model 8 and higher from exploding due to
+ * the rdmsr failing.
+ */
+ data = 0x20000000;
+ break;
case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
if (kvm_hv_msr_partition_wide(msr)) {
int r;
@@ -1808,19 +2052,28 @@
}
kvm_x86_ops->vcpu_load(vcpu, cpu);
- if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
- unsigned long khz = cpufreq_quick_get(cpu);
- if (!khz)
- khz = tsc_khz;
- per_cpu(cpu_tsc_khz, cpu) = khz;
+ if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
+ /* Make sure TSC doesn't go backwards */
+ s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
+ native_read_tsc() - vcpu->arch.last_host_tsc;
+ if (tsc_delta < 0)
+ mark_tsc_unstable("KVM discovered backwards TSC");
+ if (check_tsc_unstable()) {
+ kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta);
+ vcpu->arch.tsc_catchup = 1;
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+ }
+ if (vcpu->cpu != cpu)
+ kvm_migrate_timers(vcpu);
+ vcpu->cpu = cpu;
}
- kvm_request_guest_time_update(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
kvm_x86_ops->vcpu_put(vcpu);
kvm_put_guest_fpu(vcpu);
+ vcpu->arch.last_host_tsc = native_read_tsc();
}
static int is_efer_nx(void)
@@ -1995,7 +2248,7 @@
F(F16C);
/* cpuid 0x80000001.ecx */
const u32 kvm_supported_word6_x86_features =
- F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
+ F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) |
0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
@@ -2204,6 +2457,7 @@
return -ENXIO;
kvm_queue_interrupt(vcpu, irq->irq, false);
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
return 0;
}
@@ -2357,6 +2611,8 @@
if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
vcpu->arch.sipi_vector = events->sipi_vector;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+
return 0;
}
@@ -2760,7 +3016,7 @@
static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
{
- return kvm->arch.n_alloc_mmu_pages;
+ return kvm->arch.n_max_mmu_pages;
}
static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
@@ -2796,18 +3052,18 @@
r = 0;
switch (chip->chip_id) {
case KVM_IRQCHIP_PIC_MASTER:
- raw_spin_lock(&pic_irqchip(kvm)->lock);
+ spin_lock(&pic_irqchip(kvm)->lock);
memcpy(&pic_irqchip(kvm)->pics[0],
&chip->chip.pic,
sizeof(struct kvm_pic_state));
- raw_spin_unlock(&pic_irqchip(kvm)->lock);
+ spin_unlock(&pic_irqchip(kvm)->lock);
break;
case KVM_IRQCHIP_PIC_SLAVE:
- raw_spin_lock(&pic_irqchip(kvm)->lock);
+ spin_lock(&pic_irqchip(kvm)->lock);
memcpy(&pic_irqchip(kvm)->pics[1],
&chip->chip.pic,
sizeof(struct kvm_pic_state));
- raw_spin_unlock(&pic_irqchip(kvm)->lock);
+ spin_unlock(&pic_irqchip(kvm)->lock);
break;
case KVM_IRQCHIP_IOAPIC:
r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
@@ -3201,7 +3457,6 @@
break;
}
case KVM_SET_CLOCK: {
- struct timespec now;
struct kvm_clock_data user_ns;
u64 now_ns;
s64 delta;
@@ -3215,20 +3470,21 @@
goto out;
r = 0;
- ktime_get_ts(&now);
- now_ns = timespec_to_ns(&now);
+ local_irq_disable();
+ now_ns = get_kernel_ns();
delta = user_ns.clock - now_ns;
+ local_irq_enable();
kvm->arch.kvmclock_offset = delta;
break;
}
case KVM_GET_CLOCK: {
- struct timespec now;
struct kvm_clock_data user_ns;
u64 now_ns;
- ktime_get_ts(&now);
- now_ns = timespec_to_ns(&now);
+ local_irq_disable();
+ now_ns = get_kernel_ns();
user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
+ local_irq_enable();
user_ns.flags = 0;
r = -EFAULT;
@@ -3292,30 +3548,51 @@
kvm_x86_ops->get_segment(vcpu, var, seg);
}
+static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
+{
+ return gpa;
+}
+
+static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
+{
+ gpa_t t_gpa;
+ u32 error;
+
+ BUG_ON(!mmu_is_nested(vcpu));
+
+ /* NPT walks are always user-walks */
+ access |= PFERR_USER_MASK;
+ t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &error);
+ if (t_gpa == UNMAPPED_GVA)
+ vcpu->arch.fault.nested = true;
+
+ return t_gpa;
+}
+
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
{
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
- return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
+ return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error);
}
gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
{
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
access |= PFERR_FETCH_MASK;
- return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
+ return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error);
}
gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
{
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
access |= PFERR_WRITE_MASK;
- return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
+ return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error);
}
/* uses this to access any guest's mapped memory without checking CPL */
gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
{
- return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, 0, error);
+ return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, error);
}
static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
@@ -3326,7 +3603,8 @@
int r = X86EMUL_CONTINUE;
while (bytes) {
- gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, access, error);
+ gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
+ error);
unsigned offset = addr & (PAGE_SIZE-1);
unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
int ret;
@@ -3381,8 +3659,9 @@
int r = X86EMUL_CONTINUE;
while (bytes) {
- gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr,
- PFERR_WRITE_MASK, error);
+ gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
+ PFERR_WRITE_MASK,
+ error);
unsigned offset = addr & (PAGE_SIZE-1);
unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
int ret;
@@ -3624,7 +3903,7 @@
if (vcpu->arch.pio.count)
goto data_avail;
- trace_kvm_pio(1, port, size, 1);
+ trace_kvm_pio(0, port, size, 1);
vcpu->arch.pio.port = port;
vcpu->arch.pio.in = 1;
@@ -3652,7 +3931,7 @@
const void *val, unsigned int count,
struct kvm_vcpu *vcpu)
{
- trace_kvm_pio(0, port, size, 1);
+ trace_kvm_pio(1, port, size, 1);
vcpu->arch.pio.port = port;
vcpu->arch.pio.in = 0;
@@ -3791,6 +4070,11 @@
kvm_x86_ops->get_gdt(vcpu, dt);
}
+static void emulator_get_idt(struct desc_ptr *dt, struct kvm_vcpu *vcpu)
+{
+ kvm_x86_ops->get_idt(vcpu, dt);
+}
+
static unsigned long emulator_get_cached_segment_base(int seg,
struct kvm_vcpu *vcpu)
{
@@ -3884,6 +4168,7 @@
.set_segment_selector = emulator_set_segment_selector,
.get_cached_segment_base = emulator_get_cached_segment_base,
.get_gdt = emulator_get_gdt,
+ .get_idt = emulator_get_idt,
.get_cr = emulator_get_cr,
.set_cr = emulator_set_cr,
.cpl = emulator_get_cpl,
@@ -3919,13 +4204,64 @@
{
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
if (ctxt->exception == PF_VECTOR)
- kvm_inject_page_fault(vcpu, ctxt->cr2, ctxt->error_code);
+ kvm_propagate_fault(vcpu);
else if (ctxt->error_code_valid)
kvm_queue_exception_e(vcpu, ctxt->exception, ctxt->error_code);
else
kvm_queue_exception(vcpu, ctxt->exception);
}
+static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
+{
+ struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
+ int cs_db, cs_l;
+
+ cache_all_regs(vcpu);
+
+ kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+
+ vcpu->arch.emulate_ctxt.vcpu = vcpu;
+ vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
+ vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
+ vcpu->arch.emulate_ctxt.mode =
+ (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
+ (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
+ ? X86EMUL_MODE_VM86 : cs_l
+ ? X86EMUL_MODE_PROT64 : cs_db
+ ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
+ memset(c, 0, sizeof(struct decode_cache));
+ memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
+}
+
+int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq)
+{
+ struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
+ int ret;
+
+ init_emulate_ctxt(vcpu);
+
+ vcpu->arch.emulate_ctxt.decode.op_bytes = 2;
+ vcpu->arch.emulate_ctxt.decode.ad_bytes = 2;
+ vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip;
+ ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq);
+
+ if (ret != X86EMUL_CONTINUE)
+ return EMULATE_FAIL;
+
+ vcpu->arch.emulate_ctxt.eip = c->eip;
+ memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
+ kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
+ kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
+
+ if (irq == NMI_VECTOR)
+ vcpu->arch.nmi_pending = false;
+ else
+ vcpu->arch.interrupt.pending = false;
+
+ return EMULATE_DONE;
+}
+EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
+
static int handle_emulation_failure(struct kvm_vcpu *vcpu)
{
++vcpu->stat.insn_emulation_fail;
@@ -3982,24 +4318,15 @@
cache_all_regs(vcpu);
if (!(emulation_type & EMULTYPE_NO_DECODE)) {
- int cs_db, cs_l;
- kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
-
- vcpu->arch.emulate_ctxt.vcpu = vcpu;
- vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
- vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
- vcpu->arch.emulate_ctxt.mode =
- (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
- (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
- ? X86EMUL_MODE_VM86 : cs_l
- ? X86EMUL_MODE_PROT64 : cs_db
- ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
- memset(c, 0, sizeof(struct decode_cache));
- memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
+ init_emulate_ctxt(vcpu);
vcpu->arch.emulate_ctxt.interruptibility = 0;
vcpu->arch.emulate_ctxt.exception = -1;
+ vcpu->arch.emulate_ctxt.perm_ok = false;
- r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
+ r = x86_decode_insn(&vcpu->arch.emulate_ctxt);
+ if (r == X86EMUL_PROPAGATE_FAULT)
+ goto done;
+
trace_kvm_emulate_insn_start(vcpu);
/* Only allow emulation of specific instructions on #UD
@@ -4049,41 +4376,39 @@
memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
restart:
- r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
+ r = x86_emulate_insn(&vcpu->arch.emulate_ctxt);
- if (r) { /* emulation failed */
+ if (r == EMULATION_FAILED) {
if (reexecute_instruction(vcpu, cr2))
return EMULATE_DONE;
return handle_emulation_failure(vcpu);
}
+done:
+ if (vcpu->arch.emulate_ctxt.exception >= 0) {
+ inject_emulated_exception(vcpu);
+ r = EMULATE_DONE;
+ } else if (vcpu->arch.pio.count) {
+ if (!vcpu->arch.pio.in)
+ vcpu->arch.pio.count = 0;
+ r = EMULATE_DO_MMIO;
+ } else if (vcpu->mmio_needed) {
+ if (vcpu->mmio_is_write)
+ vcpu->mmio_needed = 0;
+ r = EMULATE_DO_MMIO;
+ } else if (r == EMULATION_RESTART)
+ goto restart;
+ else
+ r = EMULATE_DONE;
+
toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility);
kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
- if (vcpu->arch.emulate_ctxt.exception >= 0) {
- inject_emulated_exception(vcpu);
- return EMULATE_DONE;
- }
-
- if (vcpu->arch.pio.count) {
- if (!vcpu->arch.pio.in)
- vcpu->arch.pio.count = 0;
- return EMULATE_DO_MMIO;
- }
-
- if (vcpu->mmio_needed) {
- if (vcpu->mmio_is_write)
- vcpu->mmio_needed = 0;
- return EMULATE_DO_MMIO;
- }
-
- if (vcpu->arch.emulate_ctxt.restart)
- goto restart;
-
- return EMULATE_DONE;
+ return r;
}
EXPORT_SYMBOL_GPL(emulate_instruction);
@@ -4097,9 +4422,23 @@
}
EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
-static void bounce_off(void *info)
+static void tsc_bad(void *info)
{
- /* nothing */
+ __get_cpu_var(cpu_tsc_khz) = 0;
+}
+
+static void tsc_khz_changed(void *data)
+{
+ struct cpufreq_freqs *freq = data;
+ unsigned long khz = 0;
+
+ if (data)
+ khz = freq->new;
+ else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ khz = cpufreq_quick_get(raw_smp_processor_id());
+ if (!khz)
+ khz = tsc_khz;
+ __get_cpu_var(cpu_tsc_khz) = khz;
}
static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
@@ -4110,21 +4449,60 @@
struct kvm_vcpu *vcpu;
int i, send_ipi = 0;
+ /*
+ * We allow guests to temporarily run on slowing clocks,
+ * provided we notify them after, or to run on accelerating
+ * clocks, provided we notify them before. Thus time never
+ * goes backwards.
+ *
+ * However, we have a problem. We can't atomically update
+ * the frequency of a given CPU from this function; it is
+ * merely a notifier, which can be called from any CPU.
+ * Changing the TSC frequency at arbitrary points in time
+ * requires a recomputation of local variables related to
+ * the TSC for each VCPU. We must flag these local variables
+ * to be updated and be sure the update takes place with the
+ * new frequency before any guests proceed.
+ *
+ * Unfortunately, the combination of hotplug CPU and frequency
+ * change creates an intractable locking scenario; the order
+ * of when these callouts happen is undefined with respect to
+ * CPU hotplug, and they can race with each other. As such,
+ * merely setting per_cpu(cpu_tsc_khz) = X during a hotadd is
+ * undefined; you can actually have a CPU frequency change take
+ * place in between the computation of X and the setting of the
+ * variable. To protect against this problem, all updates of
+ * the per_cpu tsc_khz variable are done in an interrupt
+ * protected IPI, and all callers wishing to update the value
+ * must wait for a synchronous IPI to complete (which is trivial
+ * if the caller is on the CPU already). This establishes the
+ * necessary total order on variable updates.
+ *
+ * Note that because a guest time update may take place
+ * anytime after the setting of the VCPU's request bit, the
+ * correct TSC value must be set before the request. However,
+ * to ensure the update actually makes it to any guest which
+ * starts running in hardware virtualization between the set
+ * and the acquisition of the spinlock, we must also ping the
+ * CPU after setting the request bit.
+ *
+ */
+
if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
return 0;
if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
return 0;
- per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
+
+ smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->cpu != freq->cpu)
continue;
- if (!kvm_request_guest_time_update(vcpu))
- continue;
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
if (vcpu->cpu != smp_processor_id())
- send_ipi++;
+ send_ipi = 1;
}
}
spin_unlock(&kvm_lock);
@@ -4142,32 +4520,57 @@
* guest context is entered kvmclock will be updated,
* so the guest will not see stale values.
*/
- smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
+ smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
}
return 0;
}
static struct notifier_block kvmclock_cpufreq_notifier_block = {
- .notifier_call = kvmclock_cpufreq_notifier
+ .notifier_call = kvmclock_cpufreq_notifier
+};
+
+static int kvmclock_cpu_notifier(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_DOWN_FAILED:
+ smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
+ break;
+ case CPU_DOWN_PREPARE:
+ smp_call_function_single(cpu, tsc_bad, NULL, 1);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block kvmclock_cpu_notifier_block = {
+ .notifier_call = kvmclock_cpu_notifier,
+ .priority = -INT_MAX
};
static void kvm_timer_init(void)
{
int cpu;
+ max_tsc_khz = tsc_khz;
+ register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+#ifdef CONFIG_CPU_FREQ
+ struct cpufreq_policy policy;
+ memset(&policy, 0, sizeof(policy));
+ cpufreq_get_policy(&policy, get_cpu());
+ if (policy.cpuinfo.max_freq)
+ max_tsc_khz = policy.cpuinfo.max_freq;
+#endif
cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
- for_each_online_cpu(cpu) {
- unsigned long khz = cpufreq_get(cpu);
- if (!khz)
- khz = tsc_khz;
- per_cpu(cpu_tsc_khz, cpu) = khz;
- }
- } else {
- for_each_possible_cpu(cpu)
- per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
}
+ pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
+ for_each_online_cpu(cpu)
+ smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
}
static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
@@ -4269,6 +4672,7 @@
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
+ unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
kvm_x86_ops = NULL;
kvm_mmu_module_exit();
}
@@ -4684,8 +5088,11 @@
kvm_mmu_unload(vcpu);
if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
__kvm_migrate_timers(vcpu);
- if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu))
- kvm_write_guest_time(vcpu);
+ if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
+ r = kvm_guest_time_update(vcpu);
+ if (unlikely(r))
+ goto out;
+ }
if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
kvm_mmu_sync_roots(vcpu);
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
@@ -4710,6 +5117,21 @@
if (unlikely(r))
goto out;
+ if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
+ inject_pending_event(vcpu);
+
+ /* enable NMI/IRQ window open exits if needed */
+ if (vcpu->arch.nmi_pending)
+ kvm_x86_ops->enable_nmi_window(vcpu);
+ else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+ kvm_x86_ops->enable_irq_window(vcpu);
+
+ if (kvm_lapic_enabled(vcpu)) {
+ update_cr8_intercept(vcpu);
+ kvm_lapic_sync_to_vapic(vcpu);
+ }
+ }
+
preempt_disable();
kvm_x86_ops->prepare_guest_switch(vcpu);
@@ -4728,23 +5150,11 @@
smp_wmb();
local_irq_enable();
preempt_enable();
+ kvm_x86_ops->cancel_injection(vcpu);
r = 1;
goto out;
}
- inject_pending_event(vcpu);
-
- /* enable NMI/IRQ window open exits if needed */
- if (vcpu->arch.nmi_pending)
- kvm_x86_ops->enable_nmi_window(vcpu);
- else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
- kvm_x86_ops->enable_irq_window(vcpu);
-
- if (kvm_lapic_enabled(vcpu)) {
- update_cr8_intercept(vcpu);
- kvm_lapic_sync_to_vapic(vcpu);
- }
-
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
kvm_guest_enter();
@@ -4770,6 +5180,8 @@
if (hw_breakpoint_active())
hw_breakpoint_restore();
+ kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
+
atomic_set(&vcpu->guest_mode, 0);
smp_wmb();
local_irq_enable();
@@ -4899,8 +5311,7 @@
if (!irqchip_in_kernel(vcpu->kvm))
kvm_set_cr8(vcpu, kvm_run->cr8);
- if (vcpu->arch.pio.count || vcpu->mmio_needed ||
- vcpu->arch.emulate_ctxt.restart) {
+ if (vcpu->arch.pio.count || vcpu->mmio_needed) {
if (vcpu->mmio_needed) {
memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
vcpu->mmio_read_completed = 1;
@@ -4981,6 +5392,8 @@
vcpu->arch.exception.pending = false;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+
return 0;
}
@@ -5044,6 +5457,7 @@
struct kvm_mp_state *mp_state)
{
vcpu->arch.mp_state = mp_state->mp_state;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
return 0;
}
@@ -5051,24 +5465,11 @@
bool has_error_code, u32 error_code)
{
struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
- int cs_db, cs_l, ret;
- cache_all_regs(vcpu);
+ int ret;
- kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+ init_emulate_ctxt(vcpu);
- vcpu->arch.emulate_ctxt.vcpu = vcpu;
- vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
- vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
- vcpu->arch.emulate_ctxt.mode =
- (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
- (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
- ? X86EMUL_MODE_VM86 : cs_l
- ? X86EMUL_MODE_PROT64 : cs_db
- ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
- memset(c, 0, sizeof(struct decode_cache));
- memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
-
- ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops,
+ ret = emulator_task_switch(&vcpu->arch.emulate_ctxt,
tss_selector, reason, has_error_code,
error_code);
@@ -5078,6 +5479,7 @@
memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
return EMULATE_DONE;
}
EXPORT_SYMBOL_GPL(kvm_task_switch);
@@ -5113,7 +5515,7 @@
mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
if (!is_long_mode(vcpu) && is_pae(vcpu)) {
- load_pdptrs(vcpu, vcpu->arch.cr3);
+ load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3);
mmu_reset_needed = 1;
}
@@ -5148,6 +5550,8 @@
!is_protmode(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+
return 0;
}
@@ -5334,6 +5738,10 @@
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
unsigned int id)
{
+ if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
+ printk_once(KERN_WARNING
+ "kvm: SMP vm created on host with unstable TSC; "
+ "guest TSC will not be reliable\n");
return kvm_x86_ops->vcpu_create(kvm, id);
}
@@ -5376,22 +5784,22 @@
vcpu->arch.dr6 = DR6_FIXED_1;
vcpu->arch.dr7 = DR7_FIXED_1;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+
return kvm_x86_ops->vcpu_reset(vcpu);
}
int kvm_arch_hardware_enable(void *garbage)
{
- /*
- * Since this may be called from a hotplug notifcation,
- * we can't get the CPU frequency directly.
- */
- if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
- int cpu = raw_smp_processor_id();
- per_cpu(cpu_tsc_khz, cpu) = 0;
- }
+ struct kvm *kvm;
+ struct kvm_vcpu *vcpu;
+ int i;
kvm_shared_msr_cpu_online();
-
+ list_for_each_entry(kvm, &vm_list, vm_list)
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ if (vcpu->cpu == smp_processor_id())
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
return kvm_x86_ops->hardware_enable(garbage);
}
@@ -5425,7 +5833,11 @@
BUG_ON(vcpu->kvm == NULL);
kvm = vcpu->kvm;
+ vcpu->arch.emulate_ctxt.ops = &emulate_ops;
+ vcpu->arch.walk_mmu = &vcpu->arch.mmu;
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+ vcpu->arch.mmu.translate_gpa = translate_gpa;
+ vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
else
@@ -5438,6 +5850,9 @@
}
vcpu->arch.pio_data = page_address(page);
+ if (!kvm->arch.virtual_tsc_khz)
+ kvm_arch_set_tsc_khz(kvm, max_tsc_khz);
+
r = kvm_mmu_create(vcpu);
if (r < 0)
goto fail_free_pio_data;
@@ -5497,7 +5912,7 @@
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
- rdtscll(kvm->arch.vm_init_tsc);
+ spin_lock_init(&kvm->arch.tsc_write_lock);
return kvm;
}
@@ -5684,6 +6099,7 @@
kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
rflags |= X86_EFLAGS_TF;
kvm_x86_ops->set_rflags(vcpu, rflags);
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
}
EXPORT_SYMBOL_GPL(kvm_set_rflags);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index b7a4047..2cea414 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -50,6 +50,11 @@
#endif
}
+static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
+}
+
static inline int is_pae(struct kvm_vcpu *vcpu)
{
return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);
@@ -67,5 +72,8 @@
void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
+int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq);
+
+void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data);
#endif
diff --git a/crypto/Kconfig b/crypto/Kconfig
index e573077..e4bac29 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -23,13 +23,12 @@
config CRYPTO_FIPS
bool "FIPS 200 compliance"
- depends on CRYPTO_ANSI_CPRNG
+ depends on CRYPTO_ANSI_CPRNG && !CRYPTO_MANAGER_DISABLE_TESTS
help
This options enables the fips boot option which is
required if you want to system to operate in a FIPS 200
certification. You should say no unless you know what
- this is. Note that CRYPTO_ANSI_CPRNG is required if this
- option is selected
+ this is.
config CRYPTO_ALGAPI
tristate
@@ -365,7 +364,7 @@
RIPEMD-160 should be used.
Developed by Hans Dobbertin, Antoon Bosselaers and Bart Preneel.
- See <http://home.esat.kuleuven.be/~bosselae/ripemd160.html>
+ See <http://homes.esat.kuleuven.be/~bosselae/ripemd160.html>
config CRYPTO_RMD160
tristate "RIPEMD-160 digest algorithm"
@@ -382,7 +381,7 @@
against RIPEMD-160.
Developed by Hans Dobbertin, Antoon Bosselaers and Bart Preneel.
- See <http://home.esat.kuleuven.be/~bosselae/ripemd160.html>
+ See <http://homes.esat.kuleuven.be/~bosselae/ripemd160.html>
config CRYPTO_RMD256
tristate "RIPEMD-256 digest algorithm"
@@ -394,7 +393,7 @@
(than RIPEMD-128).
Developed by Hans Dobbertin, Antoon Bosselaers and Bart Preneel.
- See <http://home.esat.kuleuven.be/~bosselae/ripemd160.html>
+ See <http://homes.esat.kuleuven.be/~bosselae/ripemd160.html>
config CRYPTO_RMD320
tristate "RIPEMD-320 digest algorithm"
@@ -406,7 +405,7 @@
(than RIPEMD-160).
Developed by Hans Dobbertin, Antoon Bosselaers and Bart Preneel.
- See <http://home.esat.kuleuven.be/~bosselae/ripemd160.html>
+ See <http://homes.esat.kuleuven.be/~bosselae/ripemd160.html>
config CRYPTO_SHA1
tristate "SHA1 digest algorithm"
@@ -461,7 +460,7 @@
Whirlpool will be part of the ISO/IEC 10118-3:2003(E) standard
See also:
- <http://planeta.terra.com.br/informatica/paulobarreto/WhirlpoolPage.html>
+ <http://www.larc.usp.br/~pbarreto/WhirlpoolPage.html>
config CRYPTO_GHASH_CLMUL_NI_INTEL
tristate "GHASH digest algorithm (CLMUL-NI accelerated)"
@@ -579,8 +578,8 @@
in the NESSIE competition.
See also:
- <https://www.cosic.esat.kuleuven.ac.be/nessie/reports/>
- <http://planeta.terra.com.br/informatica/paulobarreto/AnubisPage.html>
+ <https://www.cosic.esat.kuleuven.be/nessie/reports/>
+ <http://www.larc.usp.br/~pbarreto/AnubisPage.html>
config CRYPTO_ARC4
tristate "ARC4 cipher algorithm"
@@ -659,7 +658,7 @@
on 32-bit processors. Khazad uses an 128 bit key size.
See also:
- <http://planeta.terra.com.br/informatica/paulobarreto/KhazadPage.html>
+ <http://www.larc.usp.br/~pbarreto/KhazadPage.html>
config CRYPTO_SALSA20
tristate "Salsa20 stream cipher algorithm (EXPERIMENTAL)"
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index ef71318..e46d21a 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -3,6 +3,13 @@
*
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
*
+ * Added AEAD support to cryptd.
+ * Authors: Tadeusz Struk (tadeusz.struk@intel.com)
+ * Adrian Hoban <adrian.hoban@intel.com>
+ * Gabriele Paoloni <gabriele.paoloni@intel.com>
+ * Aidan O'Mahony (aidan.o.mahony@intel.com)
+ * Copyright (c) 2010, Intel Corporation.
+ *
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
@@ -12,6 +19,7 @@
#include <crypto/algapi.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/aead.h>
#include <crypto/cryptd.h>
#include <crypto/crypto_wq.h>
#include <linux/err.h>
@@ -44,6 +52,11 @@
struct cryptd_queue *queue;
};
+struct aead_instance_ctx {
+ struct crypto_aead_spawn aead_spawn;
+ struct cryptd_queue *queue;
+};
+
struct cryptd_blkcipher_ctx {
struct crypto_blkcipher *child;
};
@@ -61,6 +74,14 @@
struct shash_desc desc;
};
+struct cryptd_aead_ctx {
+ struct crypto_aead *child;
+};
+
+struct cryptd_aead_request_ctx {
+ crypto_completion_t complete;
+};
+
static void cryptd_queue_worker(struct work_struct *work);
static int cryptd_init_queue(struct cryptd_queue *queue,
@@ -601,6 +622,144 @@
return err;
}
+static void cryptd_aead_crypt(struct aead_request *req,
+ struct crypto_aead *child,
+ int err,
+ int (*crypt)(struct aead_request *req))
+{
+ struct cryptd_aead_request_ctx *rctx;
+ rctx = aead_request_ctx(req);
+
+ if (unlikely(err == -EINPROGRESS))
+ goto out;
+ aead_request_set_tfm(req, child);
+ err = crypt( req );
+ req->base.complete = rctx->complete;
+out:
+ local_bh_disable();
+ rctx->complete(&req->base, err);
+ local_bh_enable();
+}
+
+static void cryptd_aead_encrypt(struct crypto_async_request *areq, int err)
+{
+ struct cryptd_aead_ctx *ctx = crypto_tfm_ctx(areq->tfm);
+ struct crypto_aead *child = ctx->child;
+ struct aead_request *req;
+
+ req = container_of(areq, struct aead_request, base);
+ cryptd_aead_crypt(req, child, err, crypto_aead_crt(child)->encrypt);
+}
+
+static void cryptd_aead_decrypt(struct crypto_async_request *areq, int err)
+{
+ struct cryptd_aead_ctx *ctx = crypto_tfm_ctx(areq->tfm);
+ struct crypto_aead *child = ctx->child;
+ struct aead_request *req;
+
+ req = container_of(areq, struct aead_request, base);
+ cryptd_aead_crypt(req, child, err, crypto_aead_crt(child)->decrypt);
+}
+
+static int cryptd_aead_enqueue(struct aead_request *req,
+ crypto_completion_t complete)
+{
+ struct cryptd_aead_request_ctx *rctx = aead_request_ctx(req);
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct cryptd_queue *queue = cryptd_get_queue(crypto_aead_tfm(tfm));
+
+ rctx->complete = req->base.complete;
+ req->base.complete = complete;
+ return cryptd_enqueue_request(queue, &req->base);
+}
+
+static int cryptd_aead_encrypt_enqueue(struct aead_request *req)
+{
+ return cryptd_aead_enqueue(req, cryptd_aead_encrypt );
+}
+
+static int cryptd_aead_decrypt_enqueue(struct aead_request *req)
+{
+ return cryptd_aead_enqueue(req, cryptd_aead_decrypt );
+}
+
+static int cryptd_aead_init_tfm(struct crypto_tfm *tfm)
+{
+ struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
+ struct aead_instance_ctx *ictx = crypto_instance_ctx(inst);
+ struct crypto_aead_spawn *spawn = &ictx->aead_spawn;
+ struct cryptd_aead_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct crypto_aead *cipher;
+
+ cipher = crypto_spawn_aead(spawn);
+ if (IS_ERR(cipher))
+ return PTR_ERR(cipher);
+
+ crypto_aead_set_flags(cipher, CRYPTO_TFM_REQ_MAY_SLEEP);
+ ctx->child = cipher;
+ tfm->crt_aead.reqsize = sizeof(struct cryptd_aead_request_ctx);
+ return 0;
+}
+
+static void cryptd_aead_exit_tfm(struct crypto_tfm *tfm)
+{
+ struct cryptd_aead_ctx *ctx = crypto_tfm_ctx(tfm);
+ crypto_free_aead(ctx->child);
+}
+
+static int cryptd_create_aead(struct crypto_template *tmpl,
+ struct rtattr **tb,
+ struct cryptd_queue *queue)
+{
+ struct aead_instance_ctx *ctx;
+ struct crypto_instance *inst;
+ struct crypto_alg *alg;
+ int err;
+
+ alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_AEAD,
+ CRYPTO_ALG_TYPE_MASK);
+ if (IS_ERR(alg))
+ return PTR_ERR(alg);
+
+ inst = cryptd_alloc_instance(alg, 0, sizeof(*ctx));
+ err = PTR_ERR(inst);
+ if (IS_ERR(inst))
+ goto out_put_alg;
+
+ ctx = crypto_instance_ctx(inst);
+ ctx->queue = queue;
+
+ err = crypto_init_spawn(&ctx->aead_spawn.base, alg, inst,
+ CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_ASYNC);
+ if (err)
+ goto out_free_inst;
+
+ inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
+ inst->alg.cra_type = alg->cra_type;
+ inst->alg.cra_ctxsize = sizeof(struct cryptd_aead_ctx);
+ inst->alg.cra_init = cryptd_aead_init_tfm;
+ inst->alg.cra_exit = cryptd_aead_exit_tfm;
+ inst->alg.cra_aead.setkey = alg->cra_aead.setkey;
+ inst->alg.cra_aead.setauthsize = alg->cra_aead.setauthsize;
+ inst->alg.cra_aead.geniv = alg->cra_aead.geniv;
+ inst->alg.cra_aead.ivsize = alg->cra_aead.ivsize;
+ inst->alg.cra_aead.maxauthsize = alg->cra_aead.maxauthsize;
+ inst->alg.cra_aead.encrypt = cryptd_aead_encrypt_enqueue;
+ inst->alg.cra_aead.decrypt = cryptd_aead_decrypt_enqueue;
+ inst->alg.cra_aead.givencrypt = alg->cra_aead.givencrypt;
+ inst->alg.cra_aead.givdecrypt = alg->cra_aead.givdecrypt;
+
+ err = crypto_register_instance(tmpl, inst);
+ if (err) {
+ crypto_drop_spawn(&ctx->aead_spawn.base);
+out_free_inst:
+ kfree(inst);
+ }
+out_put_alg:
+ crypto_mod_put(alg);
+ return err;
+}
+
static struct cryptd_queue queue;
static int cryptd_create(struct crypto_template *tmpl, struct rtattr **tb)
@@ -616,6 +775,8 @@
return cryptd_create_blkcipher(tmpl, tb, &queue);
case CRYPTO_ALG_TYPE_DIGEST:
return cryptd_create_hash(tmpl, tb, &queue);
+ case CRYPTO_ALG_TYPE_AEAD:
+ return cryptd_create_aead(tmpl, tb, &queue);
}
return -EINVAL;
@@ -625,16 +786,21 @@
{
struct cryptd_instance_ctx *ctx = crypto_instance_ctx(inst);
struct hashd_instance_ctx *hctx = crypto_instance_ctx(inst);
+ struct aead_instance_ctx *aead_ctx = crypto_instance_ctx(inst);
switch (inst->alg.cra_flags & CRYPTO_ALG_TYPE_MASK) {
case CRYPTO_ALG_TYPE_AHASH:
crypto_drop_shash(&hctx->spawn);
kfree(ahash_instance(inst));
return;
+ case CRYPTO_ALG_TYPE_AEAD:
+ crypto_drop_spawn(&aead_ctx->aead_spawn.base);
+ kfree(inst);
+ return;
+ default:
+ crypto_drop_spawn(&ctx->spawn);
+ kfree(inst);
}
-
- crypto_drop_spawn(&ctx->spawn);
- kfree(inst);
}
static struct crypto_template cryptd_tmpl = {
@@ -724,6 +890,40 @@
}
EXPORT_SYMBOL_GPL(cryptd_free_ahash);
+struct cryptd_aead *cryptd_alloc_aead(const char *alg_name,
+ u32 type, u32 mask)
+{
+ char cryptd_alg_name[CRYPTO_MAX_ALG_NAME];
+ struct crypto_aead *tfm;
+
+ if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME,
+ "cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
+ return ERR_PTR(-EINVAL);
+ tfm = crypto_alloc_aead(cryptd_alg_name, type, mask);
+ if (IS_ERR(tfm))
+ return ERR_CAST(tfm);
+ if (tfm->base.__crt_alg->cra_module != THIS_MODULE) {
+ crypto_free_aead(tfm);
+ return ERR_PTR(-EINVAL);
+ }
+ return __cryptd_aead_cast(tfm);
+}
+EXPORT_SYMBOL_GPL(cryptd_alloc_aead);
+
+struct crypto_aead *cryptd_aead_child(struct cryptd_aead *tfm)
+{
+ struct cryptd_aead_ctx *ctx;
+ ctx = crypto_aead_ctx(&tfm->base);
+ return ctx->child;
+}
+EXPORT_SYMBOL_GPL(cryptd_aead_child);
+
+void cryptd_free_aead(struct cryptd_aead *tfm)
+{
+ crypto_free_aead(&tfm->base);
+}
+EXPORT_SYMBOL_GPL(cryptd_free_aead);
+
static int __init cryptd_init(void)
{
int err;
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index ea0b386..eab2cf7 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -172,6 +172,7 @@
config CRYPTO_DEV_NIAGARA2
tristate "Niagara2 Stream Processing Unit driver"
+ select CRYPTO_DES
select CRYPTO_ALGAPI
depends on SPARC64
help
@@ -243,4 +244,12 @@
OMAP processors have SHA1/MD5 hw accelerator. Select this if you
want to use the OMAP module for SHA1/MD5 algorithms.
+config CRYPTO_DEV_OMAP_AES
+ tristate "Support for OMAP AES hw engine"
+ depends on ARCH_OMAP2 || ARCH_OMAP3
+ select CRYPTO_AES
+ help
+ OMAP processors have AES module accelerator. Select this if you
+ want to use the OMAP module for AES algorithms.
+
endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 6dbbe00..25669733 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -2,11 +2,12 @@
obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o
obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o
-n2_crypto-objs := n2_core.o n2_asm.o
+n2_crypto-y := n2_core.o n2_asm.o
obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/
obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o
+obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o
diff --git a/drivers/crypto/amcc/Makefile b/drivers/crypto/amcc/Makefile
index aa376e8..5c0c62b 100644
--- a/drivers/crypto/amcc/Makefile
+++ b/drivers/crypto/amcc/Makefile
@@ -1,2 +1,2 @@
obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += crypto4xx.o
-crypto4xx-objs := crypto4xx_core.o crypto4xx_alg.o crypto4xx_sa.o
+crypto4xx-y := crypto4xx_core.o crypto4xx_alg.o crypto4xx_sa.o
diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c
index e449ac5..0eac3da 100644
--- a/drivers/crypto/hifn_795x.c
+++ b/drivers/crypto/hifn_795x.c
@@ -2700,8 +2700,7 @@
dev = pci_get_drvdata(pdev);
if (dev) {
- cancel_delayed_work(&dev->work);
- flush_scheduled_work();
+ cancel_delayed_work_sync(&dev->work);
hifn_unregister_rng(dev);
hifn_unregister_alg(dev);
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
new file mode 100644
index 0000000..799ca51
--- /dev/null
+++ b/drivers/crypto/omap-aes.c
@@ -0,0 +1,948 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for OMAP AES HW acceleration.
+ *
+ * Copyright (c) 2010 Nokia Corporation
+ * Author: Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/crypto.h>
+#include <linux/interrupt.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/aes.h>
+
+#include <plat/cpu.h>
+#include <plat/dma.h>
+
+/* OMAP TRM gives bitfields as start:end, where start is the higher bit
+ number. For example 7:0 */
+#define FLD_MASK(start, end) (((1 << ((start) - (end) + 1)) - 1) << (end))
+#define FLD_VAL(val, start, end) (((val) << (end)) & FLD_MASK(start, end))
+
+#define AES_REG_KEY(x) (0x1C - ((x ^ 0x01) * 0x04))
+#define AES_REG_IV(x) (0x20 + ((x) * 0x04))
+
+#define AES_REG_CTRL 0x30
+#define AES_REG_CTRL_CTR_WIDTH (1 << 7)
+#define AES_REG_CTRL_CTR (1 << 6)
+#define AES_REG_CTRL_CBC (1 << 5)
+#define AES_REG_CTRL_KEY_SIZE (3 << 3)
+#define AES_REG_CTRL_DIRECTION (1 << 2)
+#define AES_REG_CTRL_INPUT_READY (1 << 1)
+#define AES_REG_CTRL_OUTPUT_READY (1 << 0)
+
+#define AES_REG_DATA 0x34
+#define AES_REG_DATA_N(x) (0x34 + ((x) * 0x04))
+
+#define AES_REG_REV 0x44
+#define AES_REG_REV_MAJOR 0xF0
+#define AES_REG_REV_MINOR 0x0F
+
+#define AES_REG_MASK 0x48
+#define AES_REG_MASK_SIDLE (1 << 6)
+#define AES_REG_MASK_START (1 << 5)
+#define AES_REG_MASK_DMA_OUT_EN (1 << 3)
+#define AES_REG_MASK_DMA_IN_EN (1 << 2)
+#define AES_REG_MASK_SOFTRESET (1 << 1)
+#define AES_REG_AUTOIDLE (1 << 0)
+
+#define AES_REG_SYSSTATUS 0x4C
+#define AES_REG_SYSSTATUS_RESETDONE (1 << 0)
+
+#define DEFAULT_TIMEOUT (5*HZ)
+
+#define FLAGS_MODE_MASK 0x000f
+#define FLAGS_ENCRYPT BIT(0)
+#define FLAGS_CBC BIT(1)
+#define FLAGS_GIV BIT(2)
+
+#define FLAGS_NEW_KEY BIT(4)
+#define FLAGS_NEW_IV BIT(5)
+#define FLAGS_INIT BIT(6)
+#define FLAGS_FAST BIT(7)
+#define FLAGS_BUSY 8
+
+struct omap_aes_ctx {
+ struct omap_aes_dev *dd;
+
+ int keylen;
+ u32 key[AES_KEYSIZE_256 / sizeof(u32)];
+ unsigned long flags;
+};
+
+struct omap_aes_reqctx {
+ unsigned long mode;
+};
+
+#define OMAP_AES_QUEUE_LENGTH 1
+#define OMAP_AES_CACHE_SIZE 0
+
+struct omap_aes_dev {
+ struct list_head list;
+ unsigned long phys_base;
+ void __iomem *io_base;
+ struct clk *iclk;
+ struct omap_aes_ctx *ctx;
+ struct device *dev;
+ unsigned long flags;
+
+ u32 *iv;
+ u32 ctrl;
+
+ spinlock_t lock;
+ struct crypto_queue queue;
+
+ struct tasklet_struct task;
+
+ struct ablkcipher_request *req;
+ size_t total;
+ struct scatterlist *in_sg;
+ size_t in_offset;
+ struct scatterlist *out_sg;
+ size_t out_offset;
+
+ size_t buflen;
+ void *buf_in;
+ size_t dma_size;
+ int dma_in;
+ int dma_lch_in;
+ dma_addr_t dma_addr_in;
+ void *buf_out;
+ int dma_out;
+ int dma_lch_out;
+ dma_addr_t dma_addr_out;
+};
+
+/* keep registered devices data here */
+static LIST_HEAD(dev_list);
+static DEFINE_SPINLOCK(list_lock);
+
+static inline u32 omap_aes_read(struct omap_aes_dev *dd, u32 offset)
+{
+ return __raw_readl(dd->io_base + offset);
+}
+
+static inline void omap_aes_write(struct omap_aes_dev *dd, u32 offset,
+ u32 value)
+{
+ __raw_writel(value, dd->io_base + offset);
+}
+
+static inline void omap_aes_write_mask(struct omap_aes_dev *dd, u32 offset,
+ u32 value, u32 mask)
+{
+ u32 val;
+
+ val = omap_aes_read(dd, offset);
+ val &= ~mask;
+ val |= value;
+ omap_aes_write(dd, offset, val);
+}
+
+static void omap_aes_write_n(struct omap_aes_dev *dd, u32 offset,
+ u32 *value, int count)
+{
+ for (; count--; value++, offset += 4)
+ omap_aes_write(dd, offset, *value);
+}
+
+static int omap_aes_wait(struct omap_aes_dev *dd, u32 offset, u32 bit)
+{
+ unsigned long timeout = jiffies + DEFAULT_TIMEOUT;
+
+ while (!(omap_aes_read(dd, offset) & bit)) {
+ if (time_is_before_jiffies(timeout)) {
+ dev_err(dd->dev, "omap-aes timeout\n");
+ return -ETIMEDOUT;
+ }
+ }
+ return 0;
+}
+
+static int omap_aes_hw_init(struct omap_aes_dev *dd)
+{
+ int err = 0;
+
+ clk_enable(dd->iclk);
+ if (!(dd->flags & FLAGS_INIT)) {
+ /* is it necessary to reset before every operation? */
+ omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_SOFTRESET,
+ AES_REG_MASK_SOFTRESET);
+ /*
+ * prevent OCP bus error (SRESP) in case an access to the module
+ * is performed while the module is coming out of soft reset
+ */
+ __asm__ __volatile__("nop");
+ __asm__ __volatile__("nop");
+
+ err = omap_aes_wait(dd, AES_REG_SYSSTATUS,
+ AES_REG_SYSSTATUS_RESETDONE);
+ if (!err)
+ dd->flags |= FLAGS_INIT;
+ }
+
+ return err;
+}
+
+static void omap_aes_hw_cleanup(struct omap_aes_dev *dd)
+{
+ clk_disable(dd->iclk);
+}
+
+static void omap_aes_write_ctrl(struct omap_aes_dev *dd)
+{
+ unsigned int key32;
+ int i;
+ u32 val, mask;
+
+ val = FLD_VAL(((dd->ctx->keylen >> 3) - 1), 4, 3);
+ if (dd->flags & FLAGS_CBC)
+ val |= AES_REG_CTRL_CBC;
+ if (dd->flags & FLAGS_ENCRYPT)
+ val |= AES_REG_CTRL_DIRECTION;
+
+ if (dd->ctrl == val && !(dd->flags & FLAGS_NEW_IV) &&
+ !(dd->ctx->flags & FLAGS_NEW_KEY))
+ goto out;
+
+ /* only need to write control registers for new settings */
+
+ dd->ctrl = val;
+
+ val = 0;
+ if (dd->dma_lch_out >= 0)
+ val |= AES_REG_MASK_DMA_OUT_EN;
+ if (dd->dma_lch_in >= 0)
+ val |= AES_REG_MASK_DMA_IN_EN;
+
+ mask = AES_REG_MASK_DMA_IN_EN | AES_REG_MASK_DMA_OUT_EN;
+
+ omap_aes_write_mask(dd, AES_REG_MASK, val, mask);
+
+ pr_debug("Set key\n");
+ key32 = dd->ctx->keylen / sizeof(u32);
+ /* set a key */
+ for (i = 0; i < key32; i++) {
+ omap_aes_write(dd, AES_REG_KEY(i),
+ __le32_to_cpu(dd->ctx->key[i]));
+ }
+ dd->ctx->flags &= ~FLAGS_NEW_KEY;
+
+ if (dd->flags & FLAGS_NEW_IV) {
+ pr_debug("Set IV\n");
+ omap_aes_write_n(dd, AES_REG_IV(0), dd->iv, 4);
+ dd->flags &= ~FLAGS_NEW_IV;
+ }
+
+ mask = AES_REG_CTRL_CBC | AES_REG_CTRL_DIRECTION |
+ AES_REG_CTRL_KEY_SIZE;
+
+ omap_aes_write_mask(dd, AES_REG_CTRL, dd->ctrl, mask);
+
+out:
+ /* start DMA or disable idle mode */
+ omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_START,
+ AES_REG_MASK_START);
+}
+
+static struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_ctx *ctx)
+{
+ struct omap_aes_dev *dd = NULL, *tmp;
+
+ spin_lock_bh(&list_lock);
+ if (!ctx->dd) {
+ list_for_each_entry(tmp, &dev_list, list) {
+ /* FIXME: take fist available aes core */
+ dd = tmp;
+ break;
+ }
+ ctx->dd = dd;
+ } else {
+ /* already found before */
+ dd = ctx->dd;
+ }
+ spin_unlock_bh(&list_lock);
+
+ return dd;
+}
+
+static void omap_aes_dma_callback(int lch, u16 ch_status, void *data)
+{
+ struct omap_aes_dev *dd = data;
+
+ if (lch == dd->dma_lch_out)
+ tasklet_schedule(&dd->task);
+}
+
+static int omap_aes_dma_init(struct omap_aes_dev *dd)
+{
+ int err = -ENOMEM;
+
+ dd->dma_lch_out = -1;
+ dd->dma_lch_in = -1;
+
+ dd->buf_in = (void *)__get_free_pages(GFP_KERNEL, OMAP_AES_CACHE_SIZE);
+ dd->buf_out = (void *)__get_free_pages(GFP_KERNEL, OMAP_AES_CACHE_SIZE);
+ dd->buflen = PAGE_SIZE << OMAP_AES_CACHE_SIZE;
+ dd->buflen &= ~(AES_BLOCK_SIZE - 1);
+
+ if (!dd->buf_in || !dd->buf_out) {
+ dev_err(dd->dev, "unable to alloc pages.\n");
+ goto err_alloc;
+ }
+
+ /* MAP here */
+ dd->dma_addr_in = dma_map_single(dd->dev, dd->buf_in, dd->buflen,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dd->dev, dd->dma_addr_in)) {
+ dev_err(dd->dev, "dma %d bytes error\n", dd->buflen);
+ err = -EINVAL;
+ goto err_map_in;
+ }
+
+ dd->dma_addr_out = dma_map_single(dd->dev, dd->buf_out, dd->buflen,
+ DMA_FROM_DEVICE);
+ if (dma_mapping_error(dd->dev, dd->dma_addr_out)) {
+ dev_err(dd->dev, "dma %d bytes error\n", dd->buflen);
+ err = -EINVAL;
+ goto err_map_out;
+ }
+
+ err = omap_request_dma(dd->dma_in, "omap-aes-rx",
+ omap_aes_dma_callback, dd, &dd->dma_lch_in);
+ if (err) {
+ dev_err(dd->dev, "Unable to request DMA channel\n");
+ goto err_dma_in;
+ }
+ err = omap_request_dma(dd->dma_out, "omap-aes-tx",
+ omap_aes_dma_callback, dd, &dd->dma_lch_out);
+ if (err) {
+ dev_err(dd->dev, "Unable to request DMA channel\n");
+ goto err_dma_out;
+ }
+
+ omap_set_dma_dest_params(dd->dma_lch_in, 0, OMAP_DMA_AMODE_CONSTANT,
+ dd->phys_base + AES_REG_DATA, 0, 4);
+
+ omap_set_dma_dest_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4);
+ omap_set_dma_src_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4);
+
+ omap_set_dma_src_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_CONSTANT,
+ dd->phys_base + AES_REG_DATA, 0, 4);
+
+ omap_set_dma_src_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4);
+ omap_set_dma_dest_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4);
+
+ return 0;
+
+err_dma_out:
+ omap_free_dma(dd->dma_lch_in);
+err_dma_in:
+ dma_unmap_single(dd->dev, dd->dma_addr_out, dd->buflen,
+ DMA_FROM_DEVICE);
+err_map_out:
+ dma_unmap_single(dd->dev, dd->dma_addr_in, dd->buflen, DMA_TO_DEVICE);
+err_map_in:
+ free_pages((unsigned long)dd->buf_out, OMAP_AES_CACHE_SIZE);
+ free_pages((unsigned long)dd->buf_in, OMAP_AES_CACHE_SIZE);
+err_alloc:
+ if (err)
+ pr_err("error: %d\n", err);
+ return err;
+}
+
+static void omap_aes_dma_cleanup(struct omap_aes_dev *dd)
+{
+ omap_free_dma(dd->dma_lch_out);
+ omap_free_dma(dd->dma_lch_in);
+ dma_unmap_single(dd->dev, dd->dma_addr_out, dd->buflen,
+ DMA_FROM_DEVICE);
+ dma_unmap_single(dd->dev, dd->dma_addr_in, dd->buflen, DMA_TO_DEVICE);
+ free_pages((unsigned long)dd->buf_out, OMAP_AES_CACHE_SIZE);
+ free_pages((unsigned long)dd->buf_in, OMAP_AES_CACHE_SIZE);
+}
+
+static void sg_copy_buf(void *buf, struct scatterlist *sg,
+ unsigned int start, unsigned int nbytes, int out)
+{
+ struct scatter_walk walk;
+
+ if (!nbytes)
+ return;
+
+ scatterwalk_start(&walk, sg);
+ scatterwalk_advance(&walk, start);
+ scatterwalk_copychunks(buf, &walk, nbytes, out);
+ scatterwalk_done(&walk, out, 0);
+}
+
+static int sg_copy(struct scatterlist **sg, size_t *offset, void *buf,
+ size_t buflen, size_t total, int out)
+{
+ unsigned int count, off = 0;
+
+ while (buflen && total) {
+ count = min((*sg)->length - *offset, total);
+ count = min(count, buflen);
+
+ if (!count)
+ return off;
+
+ sg_copy_buf(buf + off, *sg, *offset, count, out);
+
+ off += count;
+ buflen -= count;
+ *offset += count;
+ total -= count;
+
+ if (*offset == (*sg)->length) {
+ *sg = sg_next(*sg);
+ if (*sg)
+ *offset = 0;
+ else
+ total = 0;
+ }
+ }
+
+ return off;
+}
+
+static int omap_aes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in,
+ dma_addr_t dma_addr_out, int length)
+{
+ struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct omap_aes_dev *dd = ctx->dd;
+ int len32;
+
+ pr_debug("len: %d\n", length);
+
+ dd->dma_size = length;
+
+ if (!(dd->flags & FLAGS_FAST))
+ dma_sync_single_for_device(dd->dev, dma_addr_in, length,
+ DMA_TO_DEVICE);
+
+ len32 = DIV_ROUND_UP(length, sizeof(u32));
+
+ /* IN */
+ omap_set_dma_transfer_params(dd->dma_lch_in, OMAP_DMA_DATA_TYPE_S32,
+ len32, 1, OMAP_DMA_SYNC_PACKET, dd->dma_in,
+ OMAP_DMA_DST_SYNC);
+
+ omap_set_dma_src_params(dd->dma_lch_in, 0, OMAP_DMA_AMODE_POST_INC,
+ dma_addr_in, 0, 0);
+
+ /* OUT */
+ omap_set_dma_transfer_params(dd->dma_lch_out, OMAP_DMA_DATA_TYPE_S32,
+ len32, 1, OMAP_DMA_SYNC_PACKET,
+ dd->dma_out, OMAP_DMA_SRC_SYNC);
+
+ omap_set_dma_dest_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_POST_INC,
+ dma_addr_out, 0, 0);
+
+ omap_start_dma(dd->dma_lch_in);
+ omap_start_dma(dd->dma_lch_out);
+
+ omap_aes_write_ctrl(dd);
+
+ return 0;
+}
+
+static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
+{
+ struct crypto_tfm *tfm = crypto_ablkcipher_tfm(
+ crypto_ablkcipher_reqtfm(dd->req));
+ int err, fast = 0, in, out;
+ size_t count;
+ dma_addr_t addr_in, addr_out;
+
+ pr_debug("total: %d\n", dd->total);
+
+ if (sg_is_last(dd->in_sg) && sg_is_last(dd->out_sg)) {
+ /* check for alignment */
+ in = IS_ALIGNED((u32)dd->in_sg->offset, sizeof(u32));
+ out = IS_ALIGNED((u32)dd->out_sg->offset, sizeof(u32));
+
+ fast = in && out;
+ }
+
+ if (fast) {
+ count = min(dd->total, sg_dma_len(dd->in_sg));
+ count = min(count, sg_dma_len(dd->out_sg));
+
+ if (count != dd->total)
+ return -EINVAL;
+
+ pr_debug("fast\n");
+
+ err = dma_map_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
+ if (!err) {
+ dev_err(dd->dev, "dma_map_sg() error\n");
+ return -EINVAL;
+ }
+
+ err = dma_map_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE);
+ if (!err) {
+ dev_err(dd->dev, "dma_map_sg() error\n");
+ dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
+ return -EINVAL;
+ }
+
+ addr_in = sg_dma_address(dd->in_sg);
+ addr_out = sg_dma_address(dd->out_sg);
+
+ dd->flags |= FLAGS_FAST;
+
+ } else {
+ /* use cache buffers */
+ count = sg_copy(&dd->in_sg, &dd->in_offset, dd->buf_in,
+ dd->buflen, dd->total, 0);
+
+ addr_in = dd->dma_addr_in;
+ addr_out = dd->dma_addr_out;
+
+ dd->flags &= ~FLAGS_FAST;
+
+ }
+
+ dd->total -= count;
+
+ err = omap_aes_hw_init(dd);
+
+ err = omap_aes_crypt_dma(tfm, addr_in, addr_out, count);
+
+ return err;
+}
+
+static void omap_aes_finish_req(struct omap_aes_dev *dd, int err)
+{
+ struct omap_aes_ctx *ctx;
+
+ pr_debug("err: %d\n", err);
+
+ ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(dd->req));
+
+ if (!dd->total)
+ dd->req->base.complete(&dd->req->base, err);
+}
+
+static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
+{
+ int err = 0;
+ size_t count;
+
+ pr_debug("total: %d\n", dd->total);
+
+ omap_aes_write_mask(dd, AES_REG_MASK, 0, AES_REG_MASK_START);
+
+ omap_aes_hw_cleanup(dd);
+
+ omap_stop_dma(dd->dma_lch_in);
+ omap_stop_dma(dd->dma_lch_out);
+
+ if (dd->flags & FLAGS_FAST) {
+ dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE);
+ dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
+ } else {
+ dma_sync_single_for_device(dd->dev, dd->dma_addr_out,
+ dd->dma_size, DMA_FROM_DEVICE);
+
+ /* copy data */
+ count = sg_copy(&dd->out_sg, &dd->out_offset, dd->buf_out,
+ dd->buflen, dd->dma_size, 1);
+ if (count != dd->dma_size) {
+ err = -EINVAL;
+ pr_err("not all data converted: %u\n", count);
+ }
+ }
+
+ if (err || !dd->total)
+ omap_aes_finish_req(dd, err);
+
+ return err;
+}
+
+static int omap_aes_handle_req(struct omap_aes_dev *dd)
+{
+ struct crypto_async_request *async_req, *backlog;
+ struct omap_aes_ctx *ctx;
+ struct omap_aes_reqctx *rctx;
+ struct ablkcipher_request *req;
+ unsigned long flags;
+
+ if (dd->total)
+ goto start;
+
+ spin_lock_irqsave(&dd->lock, flags);
+ backlog = crypto_get_backlog(&dd->queue);
+ async_req = crypto_dequeue_request(&dd->queue);
+ if (!async_req)
+ clear_bit(FLAGS_BUSY, &dd->flags);
+ spin_unlock_irqrestore(&dd->lock, flags);
+
+ if (!async_req)
+ return 0;
+
+ if (backlog)
+ backlog->complete(backlog, -EINPROGRESS);
+
+ req = ablkcipher_request_cast(async_req);
+
+ pr_debug("get new req\n");
+
+ /* assign new request to device */
+ dd->req = req;
+ dd->total = req->nbytes;
+ dd->in_offset = 0;
+ dd->in_sg = req->src;
+ dd->out_offset = 0;
+ dd->out_sg = req->dst;
+
+ rctx = ablkcipher_request_ctx(req);
+ ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
+ rctx->mode &= FLAGS_MODE_MASK;
+ dd->flags = (dd->flags & ~FLAGS_MODE_MASK) | rctx->mode;
+
+ dd->iv = req->info;
+ if ((dd->flags & FLAGS_CBC) && dd->iv)
+ dd->flags |= FLAGS_NEW_IV;
+ else
+ dd->flags &= ~FLAGS_NEW_IV;
+
+ ctx->dd = dd;
+ if (dd->ctx != ctx) {
+ /* assign new context to device */
+ dd->ctx = ctx;
+ ctx->flags |= FLAGS_NEW_KEY;
+ }
+
+ if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE))
+ pr_err("request size is not exact amount of AES blocks\n");
+
+start:
+ return omap_aes_crypt_dma_start(dd);
+}
+
+static void omap_aes_task(unsigned long data)
+{
+ struct omap_aes_dev *dd = (struct omap_aes_dev *)data;
+ int err;
+
+ pr_debug("enter\n");
+
+ err = omap_aes_crypt_dma_stop(dd);
+
+ err = omap_aes_handle_req(dd);
+
+ pr_debug("exit\n");
+}
+
+static int omap_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
+{
+ struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(
+ crypto_ablkcipher_reqtfm(req));
+ struct omap_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+ struct omap_aes_dev *dd;
+ unsigned long flags;
+ int err;
+
+ pr_debug("nbytes: %d, enc: %d, cbc: %d\n", req->nbytes,
+ !!(mode & FLAGS_ENCRYPT),
+ !!(mode & FLAGS_CBC));
+
+ dd = omap_aes_find_dev(ctx);
+ if (!dd)
+ return -ENODEV;
+
+ rctx->mode = mode;
+
+ spin_lock_irqsave(&dd->lock, flags);
+ err = ablkcipher_enqueue_request(&dd->queue, req);
+ spin_unlock_irqrestore(&dd->lock, flags);
+
+ if (!test_and_set_bit(FLAGS_BUSY, &dd->flags))
+ omap_aes_handle_req(dd);
+
+ pr_debug("exit\n");
+
+ return err;
+}
+
+/* ********************** ALG API ************************************ */
+
+static int omap_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+ if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 &&
+ keylen != AES_KEYSIZE_256)
+ return -EINVAL;
+
+ pr_debug("enter, keylen: %d\n", keylen);
+
+ memcpy(ctx->key, key, keylen);
+ ctx->keylen = keylen;
+ ctx->flags |= FLAGS_NEW_KEY;
+
+ return 0;
+}
+
+static int omap_aes_ecb_encrypt(struct ablkcipher_request *req)
+{
+ return omap_aes_crypt(req, FLAGS_ENCRYPT);
+}
+
+static int omap_aes_ecb_decrypt(struct ablkcipher_request *req)
+{
+ return omap_aes_crypt(req, 0);
+}
+
+static int omap_aes_cbc_encrypt(struct ablkcipher_request *req)
+{
+ return omap_aes_crypt(req, FLAGS_ENCRYPT | FLAGS_CBC);
+}
+
+static int omap_aes_cbc_decrypt(struct ablkcipher_request *req)
+{
+ return omap_aes_crypt(req, FLAGS_CBC);
+}
+
+static int omap_aes_cra_init(struct crypto_tfm *tfm)
+{
+ pr_debug("enter\n");
+
+ tfm->crt_ablkcipher.reqsize = sizeof(struct omap_aes_reqctx);
+
+ return 0;
+}
+
+static void omap_aes_cra_exit(struct crypto_tfm *tfm)
+{
+ pr_debug("enter\n");
+}
+
+/* ********************** ALGS ************************************ */
+
+static struct crypto_alg algs[] = {
+{
+ .cra_name = "ecb(aes)",
+ .cra_driver_name = "ecb-aes-omap",
+ .cra_priority = 100,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct omap_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = omap_aes_cra_init,
+ .cra_exit = omap_aes_cra_exit,
+ .cra_u.ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = omap_aes_setkey,
+ .encrypt = omap_aes_ecb_encrypt,
+ .decrypt = omap_aes_ecb_decrypt,
+ }
+},
+{
+ .cra_name = "cbc(aes)",
+ .cra_driver_name = "cbc-aes-omap",
+ .cra_priority = 100,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct omap_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = omap_aes_cra_init,
+ .cra_exit = omap_aes_cra_exit,
+ .cra_u.ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = omap_aes_setkey,
+ .encrypt = omap_aes_cbc_encrypt,
+ .decrypt = omap_aes_cbc_decrypt,
+ }
+}
+};
+
+static int omap_aes_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct omap_aes_dev *dd;
+ struct resource *res;
+ int err = -ENOMEM, i, j;
+ u32 reg;
+
+ dd = kzalloc(sizeof(struct omap_aes_dev), GFP_KERNEL);
+ if (dd == NULL) {
+ dev_err(dev, "unable to alloc data struct.\n");
+ goto err_data;
+ }
+ dd->dev = dev;
+ platform_set_drvdata(pdev, dd);
+
+ spin_lock_init(&dd->lock);
+ crypto_init_queue(&dd->queue, OMAP_AES_QUEUE_LENGTH);
+
+ /* Get the base address */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res) {
+ dev_err(dev, "invalid resource type\n");
+ err = -ENODEV;
+ goto err_res;
+ }
+ dd->phys_base = res->start;
+
+ /* Get the DMA */
+ res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+ if (!res)
+ dev_info(dev, "no DMA info\n");
+ else
+ dd->dma_out = res->start;
+
+ /* Get the DMA */
+ res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
+ if (!res)
+ dev_info(dev, "no DMA info\n");
+ else
+ dd->dma_in = res->start;
+
+ /* Initializing the clock */
+ dd->iclk = clk_get(dev, "ick");
+ if (!dd->iclk) {
+ dev_err(dev, "clock intialization failed.\n");
+ err = -ENODEV;
+ goto err_res;
+ }
+
+ dd->io_base = ioremap(dd->phys_base, SZ_4K);
+ if (!dd->io_base) {
+ dev_err(dev, "can't ioremap\n");
+ err = -ENOMEM;
+ goto err_io;
+ }
+
+ clk_enable(dd->iclk);
+ reg = omap_aes_read(dd, AES_REG_REV);
+ dev_info(dev, "OMAP AES hw accel rev: %u.%u\n",
+ (reg & AES_REG_REV_MAJOR) >> 4, reg & AES_REG_REV_MINOR);
+ clk_disable(dd->iclk);
+
+ tasklet_init(&dd->task, omap_aes_task, (unsigned long)dd);
+
+ err = omap_aes_dma_init(dd);
+ if (err)
+ goto err_dma;
+
+ INIT_LIST_HEAD(&dd->list);
+ spin_lock(&list_lock);
+ list_add_tail(&dd->list, &dev_list);
+ spin_unlock(&list_lock);
+
+ for (i = 0; i < ARRAY_SIZE(algs); i++) {
+ pr_debug("i: %d\n", i);
+ INIT_LIST_HEAD(&algs[i].cra_list);
+ err = crypto_register_alg(&algs[i]);
+ if (err)
+ goto err_algs;
+ }
+
+ pr_info("probe() done\n");
+
+ return 0;
+err_algs:
+ for (j = 0; j < i; j++)
+ crypto_unregister_alg(&algs[j]);
+ omap_aes_dma_cleanup(dd);
+err_dma:
+ tasklet_kill(&dd->task);
+ iounmap(dd->io_base);
+err_io:
+ clk_put(dd->iclk);
+err_res:
+ kfree(dd);
+ dd = NULL;
+err_data:
+ dev_err(dev, "initialization failed.\n");
+ return err;
+}
+
+static int omap_aes_remove(struct platform_device *pdev)
+{
+ struct omap_aes_dev *dd = platform_get_drvdata(pdev);
+ int i;
+
+ if (!dd)
+ return -ENODEV;
+
+ spin_lock(&list_lock);
+ list_del(&dd->list);
+ spin_unlock(&list_lock);
+
+ for (i = 0; i < ARRAY_SIZE(algs); i++)
+ crypto_unregister_alg(&algs[i]);
+
+ tasklet_kill(&dd->task);
+ omap_aes_dma_cleanup(dd);
+ iounmap(dd->io_base);
+ clk_put(dd->iclk);
+ kfree(dd);
+ dd = NULL;
+
+ return 0;
+}
+
+static struct platform_driver omap_aes_driver = {
+ .probe = omap_aes_probe,
+ .remove = omap_aes_remove,
+ .driver = {
+ .name = "omap-aes",
+ .owner = THIS_MODULE,
+ },
+};
+
+static int __init omap_aes_mod_init(void)
+{
+ pr_info("loading %s driver\n", "omap-aes");
+
+ if (!cpu_class_is_omap2() || omap_type() != OMAP2_DEVICE_TYPE_SEC) {
+ pr_err("Unsupported cpu\n");
+ return -ENODEV;
+ }
+
+ return platform_driver_register(&omap_aes_driver);
+}
+
+static void __exit omap_aes_mod_exit(void)
+{
+ platform_driver_unregister(&omap_aes_driver);
+}
+
+module_init(omap_aes_mod_init);
+module_exit(omap_aes_mod_exit);
+
+MODULE_DESCRIPTION("OMAP AES hw acceleration support.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Dmitry Kasatkin");
+
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 7d14856..a081c7c 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -311,7 +311,8 @@
len32 = DIV_ROUND_UP(length, sizeof(u32));
omap_set_dma_transfer_params(dd->dma_lch, OMAP_DMA_DATA_TYPE_S32, len32,
- 1, OMAP_DMA_SYNC_PACKET, dd->dma, OMAP_DMA_DST_SYNC);
+ 1, OMAP_DMA_SYNC_PACKET, dd->dma,
+ OMAP_DMA_DST_SYNC_PREFETCH);
omap_set_dma_src_params(dd->dma_lch, 0, OMAP_DMA_AMODE_POST_INC,
dma_addr, 0, 0);
@@ -1072,6 +1073,9 @@
omap_set_dma_dest_burst_mode(dd->dma_lch,
OMAP_DMA_DATA_BURST_16);
+ omap_set_dma_src_burst_mode(dd->dma_lch,
+ OMAP_DMA_DATA_BURST_4);
+
return 0;
}
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 4bcd825..b879c3f 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -161,7 +161,7 @@
static void to_talitos_ptr(struct talitos_ptr *talitos_ptr, dma_addr_t dma_addr)
{
talitos_ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr));
- talitos_ptr->eptr = cpu_to_be32(upper_32_bits(dma_addr));
+ talitos_ptr->eptr = upper_32_bits(dma_addr);
}
/*
@@ -332,10 +332,9 @@
/* GO! */
wmb();
- out_be32(priv->reg + TALITOS_FF(ch),
- cpu_to_be32(upper_32_bits(request->dma_desc)));
+ out_be32(priv->reg + TALITOS_FF(ch), upper_32_bits(request->dma_desc));
out_be32(priv->reg + TALITOS_FF_LO(ch),
- cpu_to_be32(lower_32_bits(request->dma_desc)));
+ lower_32_bits(request->dma_desc));
spin_unlock_irqrestore(&priv->chan[ch].head_lock, flags);
@@ -1751,14 +1750,14 @@
ahash_init(areq);
req_ctx->swinit = 1;/* prevent h/w initting context with sha256 values*/
- req_ctx->hw_context[0] = cpu_to_be32(SHA224_H0);
- req_ctx->hw_context[1] = cpu_to_be32(SHA224_H1);
- req_ctx->hw_context[2] = cpu_to_be32(SHA224_H2);
- req_ctx->hw_context[3] = cpu_to_be32(SHA224_H3);
- req_ctx->hw_context[4] = cpu_to_be32(SHA224_H4);
- req_ctx->hw_context[5] = cpu_to_be32(SHA224_H5);
- req_ctx->hw_context[6] = cpu_to_be32(SHA224_H6);
- req_ctx->hw_context[7] = cpu_to_be32(SHA224_H7);
+ req_ctx->hw_context[0] = SHA224_H0;
+ req_ctx->hw_context[1] = SHA224_H1;
+ req_ctx->hw_context[2] = SHA224_H2;
+ req_ctx->hw_context[3] = SHA224_H3;
+ req_ctx->hw_context[4] = SHA224_H4;
+ req_ctx->hw_context[5] = SHA224_H5;
+ req_ctx->hw_context[6] = SHA224_H6;
+ req_ctx->hw_context[7] = SHA224_H7;
/* init 64-bit count */
req_ctx->hw_context[8] = 0;
@@ -2333,8 +2332,7 @@
talitos_unregister_rng(dev);
for (i = 0; i < priv->num_channels; i++)
- if (priv->chan[i].fifo)
- kfree(priv->chan[i].fifo);
+ kfree(priv->chan[i].fifo);
kfree(priv->chan);
@@ -2389,6 +2387,9 @@
DESC_HDR_MODE0_MDEU_SHA256;
}
break;
+ default:
+ dev_err(dev, "unknown algorithm type %d\n", t_alg->algt.type);
+ return ERR_PTR(-EINVAL);
}
alg->cra_module = THIS_MODULE;
diff --git a/drivers/gpu/drm/nouveau/nouveau_i2c.h b/drivers/gpu/drm/nouveau/nouveau_i2c.h
index f71cb32..cfe7c84 100644
--- a/drivers/gpu/drm/nouveau/nouveau_i2c.h
+++ b/drivers/gpu/drm/nouveau/nouveau_i2c.h
@@ -24,7 +24,6 @@
#define __NOUVEAU_I2C_H__
#include <linux/i2c.h>
-#include <linux/i2c-id.h>
#include <linux/i2c-algo-bit.h>
#include "drm_dp_helper.h"
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 17a6602..454c1dc 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -36,7 +36,6 @@
#include <drm_dp_helper.h>
#include <drm_fixed.h>
#include <linux/i2c.h>
-#include <linux/i2c-id.h>
#include <linux/i2c-algo-bit.h>
struct radeon_bo;
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 6369ba7..3052e29 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -56,20 +56,20 @@
depends on HID
config HID_3M_PCT
- tristate "3M PCT"
+ tristate "3M PCT touchscreen"
depends on USB_HID
---help---
Support for 3M PCT touch screens.
config HID_A4TECH
- tristate "A4 tech" if EMBEDDED
+ tristate "A4 tech mice" if EMBEDDED
depends on USB_HID
default !EMBEDDED
---help---
Support for A4 tech X5 and WOP-35 / Trust 450L mice.
config HID_ACRUX_FF
- tristate "ACRUX force feedback support"
+ tristate "ACRUX force feedback"
depends on USB_HID
select INPUT_FF_MEMLESS
---help---
@@ -77,7 +77,7 @@
game controllers.
config HID_APPLE
- tristate "Apple" if EMBEDDED
+ tristate "Apple {i,Power,Mac}Books" if EMBEDDED
depends on (USB_HID || BT_HIDP)
default !EMBEDDED
---help---
@@ -88,7 +88,7 @@
MacBooks, MacBook Pros and Apple Aluminum.
config HID_BELKIN
- tristate "Belkin" if EMBEDDED
+ tristate "Belkin Flip KVM and Wireless keyboard" if EMBEDDED
depends on USB_HID
default !EMBEDDED
---help---
@@ -101,14 +101,14 @@
Support for Cando dual touch panel.
config HID_CHERRY
- tristate "Cherry" if EMBEDDED
+ tristate "Cherry Cymotion keyboard" if EMBEDDED
depends on USB_HID
default !EMBEDDED
---help---
Support for Cherry Cymotion keyboard.
config HID_CHICONY
- tristate "Chicony" if EMBEDDED
+ tristate "Chicony Tactical pad" if EMBEDDED
depends on USB_HID
default !EMBEDDED
---help---
@@ -130,20 +130,20 @@
and some additional multimedia keys.
config HID_CYPRESS
- tristate "Cypress" if EMBEDDED
+ tristate "Cypress mouse and barcode readers" if EMBEDDED
depends on USB_HID
default !EMBEDDED
---help---
Support for cypress mouse and barcode readers.
config HID_DRAGONRISE
- tristate "DragonRise Inc. support"
+ tristate "DragonRise Inc. game controller"
depends on USB_HID
---help---
Say Y here if you have DragonRise Inc.game controllers.
config DRAGONRISE_FF
- bool "DragonRise Inc. force feedback support"
+ bool "DragonRise Inc. force feedback"
depends on HID_DRAGONRISE
select INPUT_FF_MEMLESS
---help---
@@ -157,46 +157,58 @@
Support for the eGalax dual-touch panel.
config HID_ELECOM
- tristate "ELECOM"
+ tristate "ELECOM BM084 bluetooth mouse"
depends on BT_HIDP
---help---
Support for the ELECOM BM084 (bluetooth mouse).
config HID_EZKEY
- tristate "Ezkey" if EMBEDDED
+ tristate "Ezkey BTC 8193 keyboard" if EMBEDDED
depends on USB_HID
default !EMBEDDED
---help---
Support for Ezkey BTC 8193 keyboard.
config HID_KYE
- tristate "Kye" if EMBEDDED
+ tristate "Kye/Genius Ergo Mouse" if EMBEDDED
depends on USB_HID
default !EMBEDDED
---help---
Support for Kye/Genius Ergo Mouse.
+config HID_UCLOGIC
+ tristate "UC-Logic"
+ depends on USB_HID
+ ---help---
+ Support for UC-Logic tablets.
+
+config HID_WALTOP
+ tristate "Waltop"
+ depends on USB_HID
+ ---help---
+ Support for Waltop tablets.
+
config HID_GYRATION
- tristate "Gyration"
+ tristate "Gyration remote control"
depends on USB_HID
---help---
Support for Gyration remote control.
config HID_TWINHAN
- tristate "Twinhan"
+ tristate "Twinhan IR remote control"
depends on USB_HID
---help---
Support for Twinhan IR remote control.
config HID_KENSINGTON
- tristate "Kensington" if EMBEDDED
+ tristate "Kensington Slimblade Trackball" if EMBEDDED
depends on USB_HID
default !EMBEDDED
---help---
Support for Kensington Slimblade Trackball.
config HID_LOGITECH
- tristate "Logitech" if EMBEDDED
+ tristate "Logitech devices" if EMBEDDED
depends on USB_HID
default !EMBEDDED
---help---
@@ -220,12 +232,12 @@
force feedback.
config LOGIRUMBLEPAD2_FF
- bool "Logitech Rumblepad 2 force feedback support"
+ bool "Logitech RumblePad/Rumblepad 2 force feedback support"
depends on HID_LOGITECH
select INPUT_FF_MEMLESS
help
Say Y here if you want to enable force feedback support for Logitech
- Rumblepad 2 devices.
+ RumblePad and Rumblepad 2 devices.
config LOGIG940_FF
bool "Logitech Flight System G940 force feedback support"
@@ -235,6 +247,14 @@
Say Y here if you want to enable force feedback support for Logitech
Flight System G940 devices.
+config LOGIWII_FF
+ bool "Logitech Speed Force Wireless force feedback support"
+ depends on HID_LOGITECH
+ select INPUT_FF_MEMLESS
+ help
+ Say Y here if you want to enable force feedback support for Logitech
+ Speed Force Wireless (Wii) devices.
+
config HID_MAGICMOUSE
tristate "Apple MagicMouse multi-touch support"
depends on BT_HIDP
@@ -245,39 +265,39 @@
Apple Wireless "Magic" Mouse.
config HID_MICROSOFT
- tristate "Microsoft" if EMBEDDED
+ tristate "Microsoft non-fully HID-compliant devices" if EMBEDDED
depends on USB_HID
default !EMBEDDED
---help---
Support for Microsoft devices that are not fully compliant with HID standard.
config HID_MOSART
- tristate "MosArt"
+ tristate "MosArt dual-touch panels"
depends on USB_HID
---help---
Support for MosArt dual-touch panels.
config HID_MONTEREY
- tristate "Monterey" if EMBEDDED
+ tristate "Monterey Genius KB29E keyboard" if EMBEDDED
depends on USB_HID
default !EMBEDDED
---help---
Support for Monterey Genius KB29E.
config HID_NTRIG
- tristate "NTrig"
+ tristate "N-Trig touch screen"
depends on USB_HID
---help---
Support for N-Trig touch screen.
config HID_ORTEK
- tristate "Ortek"
+ tristate "Ortek WKB-2000 wireless keyboard and mouse trackpad"
depends on USB_HID
---help---
Support for Ortek WKB-2000 wireless keyboard + mouse trackpad.
config HID_PANTHERLORD
- tristate "Pantherlord support"
+ tristate "Pantherlord/GreenAsia game controller"
depends on USB_HID
---help---
Say Y here if you have a PantherLord/GreenAsia based game controller
@@ -292,7 +312,7 @@
or adapter and want to enable force feedback support for it.
config HID_PETALYNX
- tristate "Petalynx"
+ tristate "Petalynx Maxter remote control"
depends on USB_HID
---help---
Support for Petalynx Maxter remote control.
@@ -356,7 +376,7 @@
Provide access to PicoLCD's GPO pins via leds class.
config HID_QUANTA
- tristate "Quanta Optical Touch"
+ tristate "Quanta Optical Touch panels"
depends on USB_HID
---help---
Support for Quanta Optical Touch dual-touch panels.
@@ -376,32 +396,39 @@
---help---
Support for Roccat Kone mouse.
+config HID_ROCCAT_PYRA
+ tristate "Roccat Pyra mouse support"
+ depends on USB_HID
+ select HID_ROCCAT
+ ---help---
+ Support for Roccat Pyra mouse.
+
config HID_SAMSUNG
- tristate "Samsung"
+ tristate "Samsung InfraRed remote control or keyboards"
depends on USB_HID
---help---
Support for Samsung InfraRed remote control or keyboards.
config HID_SONY
- tristate "Sony"
+ tristate "Sony PS3 controller"
depends on USB_HID
---help---
Support for Sony PS3 controller.
config HID_STANTUM
- tristate "Stantum"
+ tristate "Stantum multitouch panel"
depends on USB_HID
---help---
Support for Stantum multitouch panel.
config HID_SUNPLUS
- tristate "Sunplus"
+ tristate "Sunplus wireless desktop"
depends on USB_HID
---help---
Support for Sunplus wireless desktop.
config HID_GREENASIA
- tristate "GreenAsia (Product ID 0x12) support"
+ tristate "GreenAsia (Product ID 0x12) game controller support"
depends on USB_HID
---help---
Say Y here if you have a GreenAsia (Product ID 0x12) based game
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index 46f037f..c335605 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -21,6 +21,9 @@
ifdef CONFIG_LOGIG940_FF
hid-logitech-objs += hid-lg3ff.o
endif
+ifdef CONFIG_LOGIWII_FF
+ hid-logitech-objs += hid-lg4ff.o
+endif
obj-$(CONFIG_HID_3M_PCT) += hid-3m-pct.o
obj-$(CONFIG_HID_A4TECH) += hid-a4tech.o
@@ -52,6 +55,7 @@
obj-$(CONFIG_HID_PICOLCD) += hid-picolcd.o
obj-$(CONFIG_HID_ROCCAT) += hid-roccat.o
obj-$(CONFIG_HID_ROCCAT_KONE) += hid-roccat-kone.o
+obj-$(CONFIG_HID_ROCCAT_PYRA) += hid-roccat-pyra.o
obj-$(CONFIG_HID_SAMSUNG) += hid-samsung.o
obj-$(CONFIG_HID_SMARTJOYPLUS) += hid-sjoy.o
obj-$(CONFIG_HID_SONY) += hid-sony.o
@@ -61,9 +65,11 @@
obj-$(CONFIG_HID_THRUSTMASTER) += hid-tmff.o
obj-$(CONFIG_HID_TOPSEED) += hid-topseed.o
obj-$(CONFIG_HID_TWINHAN) += hid-twinhan.o
+obj-$(CONFIG_HID_UCLOGIC) += hid-uclogic.o
obj-$(CONFIG_HID_ZEROPLUS) += hid-zpff.o
obj-$(CONFIG_HID_ZYDACRON) += hid-zydacron.o
obj-$(CONFIG_HID_WACOM) += hid-wacom.o
+obj-$(CONFIG_HID_WALTOP) += hid-waltop.o
obj-$(CONFIG_USB_HID) += usbhid/
obj-$(CONFIG_USB_MOUSE) += usbhid/
diff --git a/drivers/hid/hid-3m-pct.c b/drivers/hid/hid-3m-pct.c
index 2a0d56b..02d8cd3 100644
--- a/drivers/hid/hid-3m-pct.c
+++ b/drivers/hid/hid-3m-pct.c
@@ -2,6 +2,8 @@
* HID driver for 3M PCT multitouch panels
*
* Copyright (c) 2009-2010 Stephane Chatty <chatty@enac.fr>
+ * Copyright (c) 2010 Henrik Rydberg <rydberg@euromail.se>
+ * Copyright (c) 2010 Canonical, Ltd.
*
*/
@@ -24,15 +26,26 @@
#include "hid-ids.h"
+#define MAX_SLOTS 60
+#define MAX_TRKID USHRT_MAX
+#define MAX_EVENTS 360
+
+/* estimated signal-to-noise ratios */
+#define SN_MOVE 2048
+#define SN_WIDTH 128
+
struct mmm_finger {
__s32 x, y, w, h;
- __u8 rank;
+ __u16 id;
+ bool prev_touch;
bool touch, valid;
};
struct mmm_data {
- struct mmm_finger f[10];
- __u8 curid, num;
+ struct mmm_finger f[MAX_SLOTS];
+ __u16 id;
+ __u8 curid;
+ __u8 nexp, nreal;
bool touch, valid;
};
@@ -40,6 +53,10 @@
struct hid_field *field, struct hid_usage *usage,
unsigned long **bit, int *max)
{
+ int f1 = field->logical_minimum;
+ int f2 = field->logical_maximum;
+ int df = f2 - f1;
+
switch (usage->hid & HID_USAGE_PAGE) {
case HID_UP_BUTTON:
@@ -50,18 +67,20 @@
case HID_GD_X:
hid_map_usage(hi, usage, bit, max,
EV_ABS, ABS_MT_POSITION_X);
+ input_set_abs_params(hi->input, ABS_MT_POSITION_X,
+ f1, f2, df / SN_MOVE, 0);
/* touchscreen emulation */
input_set_abs_params(hi->input, ABS_X,
- field->logical_minimum,
- field->logical_maximum, 0, 0);
+ f1, f2, df / SN_MOVE, 0);
return 1;
case HID_GD_Y:
hid_map_usage(hi, usage, bit, max,
EV_ABS, ABS_MT_POSITION_Y);
+ input_set_abs_params(hi->input, ABS_MT_POSITION_Y,
+ f1, f2, df / SN_MOVE, 0);
/* touchscreen emulation */
input_set_abs_params(hi->input, ABS_Y,
- field->logical_minimum,
- field->logical_maximum, 0, 0);
+ f1, f2, df / SN_MOVE, 0);
return 1;
}
return 0;
@@ -81,21 +100,31 @@
case HID_DG_TIPSWITCH:
/* touchscreen emulation */
hid_map_usage(hi, usage, bit, max, EV_KEY, BTN_TOUCH);
+ input_set_capability(hi->input, EV_KEY, BTN_TOUCH);
return 1;
case HID_DG_WIDTH:
hid_map_usage(hi, usage, bit, max,
EV_ABS, ABS_MT_TOUCH_MAJOR);
+ input_set_abs_params(hi->input, ABS_MT_TOUCH_MAJOR,
+ f1, f2, df / SN_WIDTH, 0);
return 1;
case HID_DG_HEIGHT:
hid_map_usage(hi, usage, bit, max,
EV_ABS, ABS_MT_TOUCH_MINOR);
+ input_set_abs_params(hi->input, ABS_MT_TOUCH_MINOR,
+ f1, f2, df / SN_WIDTH, 0);
input_set_abs_params(hi->input, ABS_MT_ORIENTATION,
- 1, 1, 0, 0);
+ 0, 1, 0, 0);
return 1;
case HID_DG_CONTACTID:
- field->logical_maximum = 59;
+ field->logical_maximum = MAX_TRKID;
hid_map_usage(hi, usage, bit, max,
EV_ABS, ABS_MT_TRACKING_ID);
+ input_set_abs_params(hi->input, ABS_MT_TRACKING_ID,
+ 0, MAX_TRKID, 0, 0);
+ if (!hi->input->mt)
+ input_mt_create_slots(hi->input, MAX_SLOTS);
+ input_set_events_per_packet(hi->input, MAX_EVENTS);
return 1;
}
/* let hid-input decide for the others */
@@ -113,10 +142,10 @@
struct hid_field *field, struct hid_usage *usage,
unsigned long **bit, int *max)
{
+ /* tell hid-input to skip setup of these event types */
if (usage->type == EV_KEY || usage->type == EV_ABS)
- clear_bit(usage->code, *bit);
-
- return 0;
+ set_bit(usage->type, hi->input->evbit);
+ return -1;
}
/*
@@ -126,70 +155,49 @@
static void mmm_filter_event(struct mmm_data *md, struct input_dev *input)
{
struct mmm_finger *oldest = 0;
- bool pressed = false, released = false;
int i;
-
- /*
- * we need to iterate on all fingers to decide if we have a press
- * or a release event in our touchscreen emulation.
- */
- for (i = 0; i < 10; ++i) {
+ for (i = 0; i < MAX_SLOTS; ++i) {
struct mmm_finger *f = &md->f[i];
if (!f->valid) {
/* this finger is just placeholder data, ignore */
- } else if (f->touch) {
+ continue;
+ }
+ input_mt_slot(input, i);
+ if (f->touch) {
/* this finger is on the screen */
int wide = (f->w > f->h);
- input_event(input, EV_ABS, ABS_MT_TRACKING_ID, i);
+ /* divided by two to match visual scale of touch */
+ int major = max(f->w, f->h) >> 1;
+ int minor = min(f->w, f->h) >> 1;
+
+ if (!f->prev_touch)
+ f->id = md->id++;
+ input_event(input, EV_ABS, ABS_MT_TRACKING_ID, f->id);
input_event(input, EV_ABS, ABS_MT_POSITION_X, f->x);
input_event(input, EV_ABS, ABS_MT_POSITION_Y, f->y);
input_event(input, EV_ABS, ABS_MT_ORIENTATION, wide);
- input_event(input, EV_ABS, ABS_MT_TOUCH_MAJOR,
- wide ? f->w : f->h);
- input_event(input, EV_ABS, ABS_MT_TOUCH_MINOR,
- wide ? f->h : f->w);
- input_mt_sync(input);
- /*
- * touchscreen emulation: maintain the age rank
- * of this finger, decide if we have a press
- */
- if (f->rank == 0) {
- f->rank = ++(md->num);
- if (f->rank == 1)
- pressed = true;
- }
- if (f->rank == 1)
+ input_event(input, EV_ABS, ABS_MT_TOUCH_MAJOR, major);
+ input_event(input, EV_ABS, ABS_MT_TOUCH_MINOR, minor);
+ /* touchscreen emulation: pick the oldest contact */
+ if (!oldest || ((f->id - oldest->id) & (SHRT_MAX + 1)))
oldest = f;
} else {
/* this finger took off the screen */
- /* touchscreen emulation: maintain age rank of others */
- int j;
-
- for (j = 0; j < 10; ++j) {
- struct mmm_finger *g = &md->f[j];
- if (g->rank > f->rank) {
- g->rank--;
- if (g->rank == 1)
- oldest = g;
- }
- }
- f->rank = 0;
- --(md->num);
- if (md->num == 0)
- released = true;
+ input_event(input, EV_ABS, ABS_MT_TRACKING_ID, -1);
}
+ f->prev_touch = f->touch;
f->valid = 0;
}
/* touchscreen emulation */
if (oldest) {
- if (pressed)
- input_event(input, EV_KEY, BTN_TOUCH, 1);
+ input_event(input, EV_KEY, BTN_TOUCH, 1);
input_event(input, EV_ABS, ABS_X, oldest->x);
input_event(input, EV_ABS, ABS_Y, oldest->y);
- } else if (released) {
+ } else {
input_event(input, EV_KEY, BTN_TOUCH, 0);
}
+ input_sync(input);
}
/*
@@ -223,10 +231,12 @@
md->f[md->curid].h = value;
break;
case HID_DG_CONTACTID:
+ value = clamp_val(value, 0, MAX_SLOTS - 1);
if (md->valid) {
md->curid = value;
md->f[value].touch = md->touch;
md->f[value].valid = 1;
+ md->nreal++;
}
break;
case HID_GD_X:
@@ -238,7 +248,12 @@
md->f[md->curid].y = value;
break;
case HID_DG_CONTACTCOUNT:
- mmm_filter_event(md, input);
+ if (value)
+ md->nexp = value;
+ if (md->nreal >= md->nexp) {
+ mmm_filter_event(md, input);
+ md->nreal = 0;
+ }
break;
}
}
@@ -255,6 +270,8 @@
int ret;
struct mmm_data *md;
+ hdev->quirks |= HID_QUIRK_NO_INPUT_SYNC;
+
md = kzalloc(sizeof(struct mmm_data), GFP_KERNEL);
if (!md) {
dev_err(&hdev->dev, "cannot allocate 3M data\n");
diff --git a/drivers/hid/hid-a4tech.c b/drivers/hid/hid-a4tech.c
index 3a2b223..1666c16 100644
--- a/drivers/hid/hid-a4tech.c
+++ b/drivers/hid/hid-a4tech.c
@@ -133,6 +133,8 @@
.driver_data = A4_2WHEEL_MOUSE_HACK_7 },
{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_X5_005D),
.driver_data = A4_2WHEEL_MOUSE_HACK_B8 },
+ { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_RP_649),
+ .driver_data = A4_2WHEEL_MOUSE_HACK_B8 },
{ }
};
MODULE_DEVICE_TABLE(hid, a4_devices);
diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c
index bba05d0..eaeca56 100644
--- a/drivers/hid/hid-apple.c
+++ b/drivers/hid/hid-apple.c
@@ -246,17 +246,18 @@
/*
* MacBook JIS keyboard has wrong logical maximum
*/
-static void apple_report_fixup(struct hid_device *hdev, __u8 *rdesc,
- unsigned int rsize)
+static __u8 *apple_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
{
struct apple_sc *asc = hid_get_drvdata(hdev);
- if ((asc->quirks & APPLE_RDESC_JIS) && rsize >= 60 &&
+ if ((asc->quirks & APPLE_RDESC_JIS) && *rsize >= 60 &&
rdesc[53] == 0x65 && rdesc[59] == 0x65) {
dev_info(&hdev->dev, "fixing up MacBook JIS keyboard report "
"descriptor\n");
rdesc[53] = rdesc[59] = 0xe7;
}
+ return rdesc;
}
static void apple_setup_input(struct input_dev *input)
diff --git a/drivers/hid/hid-cherry.c b/drivers/hid/hid-cherry.c
index 24663a8..e880086 100644
--- a/drivers/hid/hid-cherry.c
+++ b/drivers/hid/hid-cherry.c
@@ -26,15 +26,16 @@
* Cherry Cymotion keyboard have an invalid HID report descriptor,
* that needs fixing before we can parse it.
*/
-static void ch_report_fixup(struct hid_device *hdev, __u8 *rdesc,
- unsigned int rsize)
+static __u8 *ch_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
{
- if (rsize >= 17 && rdesc[11] == 0x3c && rdesc[12] == 0x02) {
+ if (*rsize >= 17 && rdesc[11] == 0x3c && rdesc[12] == 0x02) {
dev_info(&hdev->dev, "fixing up Cherry Cymotion report "
"descriptor\n");
rdesc[11] = rdesc[16] = 0xff;
rdesc[12] = rdesc[17] = 0x03;
}
+ return rdesc;
}
#define ch_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 3cb6632..7832b6e 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -388,12 +388,6 @@
__u32 data;
unsigned n;
- /* Local delimiter could have value 0, which allows size to be 0 */
- if (item->size == 0 && item->tag != HID_LOCAL_ITEM_TAG_DELIMITER) {
- dbg_hid("item data expected for local item\n");
- return -1;
- }
-
data = item_udata(item);
switch (item->tag) {
@@ -651,7 +645,7 @@
};
if (device->driver->report_fixup)
- device->driver->report_fixup(device, start, size);
+ start = device->driver->report_fixup(device, start, &size);
device->rdesc = kmemdup(start, size, GFP_KERNEL);
if (device->rdesc == NULL)
@@ -1241,6 +1235,7 @@
{ HID_USB_DEVICE(USB_VENDOR_ID_3M, USB_DEVICE_ID_3M2256) },
{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_WCP32PU) },
{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_X5_005D) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_RP_649) },
#if defined(CONFIG_HID_ACRUX_FF) || defined(CONFIG_HID_ACRUX_FF_MODULE)
{ HID_USB_DEVICE(USB_VENDOR_ID_ACRUX, 0x0802) },
#endif
@@ -1248,6 +1243,7 @@
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4) },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MIGHTYMOUSE) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE) },
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD) },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI) },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ISO) },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ANSI) },
@@ -1327,6 +1323,7 @@
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_CORDLESS_DESKTOP_LX500) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_EXTREME_3D) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WHEEL) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD_CORD) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2_2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_F3D) },
@@ -1336,6 +1333,7 @@
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOMO_WHEEL) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOMO_WHEEL2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G25_WHEEL) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WII_WHEEL) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACETRAVELLER) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACENAVIGATOR) },
@@ -1371,12 +1369,15 @@
{ HID_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH) },
{ HID_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_PIXART_IMAGING_INC_OPTICAL_TOUCH_SCREEN) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_KONE) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_PYRA_WIRED) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_IR_REMOTE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_WIRELESS_KBD_MOUSE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGX_MOUSE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_STANTUM, USB_DEVICE_ID_MTP) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_STANTUM_STM, USB_DEVICE_ID_MTP_STM) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_STANTUM_SITRONIX, USB_DEVICE_ID_MTP_SITRONIX) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SUNPLUS, USB_DEVICE_ID_SUNPLUS_WDESKTOP) },
{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb300) },
{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb304) },
@@ -1388,8 +1389,16 @@
{ HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED, USB_DEVICE_ID_TOPSEED_CYBERLINK) },
{ HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED2, USB_DEVICE_ID_TOPSEED2_RF_COMBO) },
{ HID_USB_DEVICE(USB_VENDOR_ID_TWINHAN, USB_DEVICE_ID_TWINHAN_IR_REMOTE) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_PF1209) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP4030U) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP5540U) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP8060U) },
{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SMARTJOY_PLUS) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_WACOM, USB_DEVICE_ID_WACOM_GRAPHIRE_BLUETOOTH) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SLIM_TABLET_5_8_INCH) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SLIM_TABLET_12_1_INCH) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ZYDACRON, USB_DEVICE_ID_ZYDACRON_REMOTE_CONTROL) },
diff --git a/drivers/hid/hid-cypress.c b/drivers/hid/hid-cypress.c
index 998b6f4..4cd0e23 100644
--- a/drivers/hid/hid-cypress.c
+++ b/drivers/hid/hid-cypress.c
@@ -31,16 +31,16 @@
* Some USB barcode readers from cypress have usage min and usage max in
* the wrong order
*/
-static void cp_report_fixup(struct hid_device *hdev, __u8 *rdesc,
- unsigned int rsize)
+static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
{
unsigned long quirks = (unsigned long)hid_get_drvdata(hdev);
unsigned int i;
if (!(quirks & CP_RDESC_SWAPPED_MIN_MAX))
- return;
+ return rdesc;
- for (i = 0; i < rsize - 4; i++)
+ for (i = 0; i < *rsize - 4; i++)
if (rdesc[i] == 0x29 && rdesc[i + 2] == 0x19) {
__u8 tmp;
@@ -50,6 +50,7 @@
rdesc[i + 3] = rdesc[i + 1];
rdesc[i + 1] = tmp;
}
+ return rdesc;
}
static int cp_input_mapped(struct hid_device *hdev, struct hid_input *hi,
diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 61a3e57..75c5e23 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -570,6 +570,8 @@
buf[i];
list->tail = (list->tail + i) % HID_DEBUG_BUFSIZE;
}
+
+ wake_up_interruptible(&hdev->debug_wait);
}
EXPORT_SYMBOL_GPL(hid_debug_event);
diff --git a/drivers/hid/hid-egalax.c b/drivers/hid/hid-egalax.c
index 8ca7f65..54b017a 100644
--- a/drivers/hid/hid-egalax.c
+++ b/drivers/hid/hid-egalax.c
@@ -31,7 +31,7 @@
bool first; /* is this the first finger in the frame? */
bool valid; /* valid finger data, or just placeholder? */
bool activity; /* at least one active finger previously? */
- __u16 lastx, lasty; /* latest valid (x, y) in the frame */
+ __u16 lastx, lasty, lastz; /* latest valid (x, y, z) in the frame */
};
static int egalax_input_mapping(struct hid_device *hdev, struct hid_input *hi,
@@ -79,6 +79,10 @@
case HID_DG_TIPPRESSURE:
hid_map_usage(hi, usage, bit, max,
EV_ABS, ABS_MT_PRESSURE);
+ /* touchscreen emulation */
+ input_set_abs_params(hi->input, ABS_PRESSURE,
+ field->logical_minimum,
+ field->logical_maximum, 0, 0);
return 1;
}
return 0;
@@ -109,8 +113,8 @@
if (td->valid) {
/* emit multitouch events */
input_event(input, EV_ABS, ABS_MT_TRACKING_ID, td->id);
- input_event(input, EV_ABS, ABS_MT_POSITION_X, td->x);
- input_event(input, EV_ABS, ABS_MT_POSITION_Y, td->y);
+ input_event(input, EV_ABS, ABS_MT_POSITION_X, td->x >> 3);
+ input_event(input, EV_ABS, ABS_MT_POSITION_Y, td->y >> 3);
input_event(input, EV_ABS, ABS_MT_PRESSURE, td->z);
input_mt_sync(input);
@@ -121,6 +125,7 @@
*/
td->lastx = td->x;
td->lasty = td->y;
+ td->lastz = td->z;
}
/*
@@ -129,8 +134,9 @@
* the oldest on the panel, the one we want for single touch
*/
if (!td->first && td->activity) {
- input_event(input, EV_ABS, ABS_X, td->lastx);
- input_event(input, EV_ABS, ABS_Y, td->lasty);
+ input_event(input, EV_ABS, ABS_X, td->lastx >> 3);
+ input_event(input, EV_ABS, ABS_Y, td->lasty >> 3);
+ input_event(input, EV_ABS, ABS_PRESSURE, td->lastz);
}
if (!td->valid) {
diff --git a/drivers/hid/hid-elecom.c b/drivers/hid/hid-elecom.c
index 7a40878..6e31f30 100644
--- a/drivers/hid/hid-elecom.c
+++ b/drivers/hid/hid-elecom.c
@@ -20,14 +20,15 @@
#include "hid-ids.h"
-static void elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc,
- unsigned int rsize)
+static __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
{
- if (rsize >= 48 && rdesc[46] == 0x05 && rdesc[47] == 0x0c) {
+ if (*rsize >= 48 && rdesc[46] == 0x05 && rdesc[47] == 0x0c) {
dev_info(&hdev->dev, "Fixing up Elecom BM084 "
"report descriptor.\n");
rdesc[47] = 0x00;
}
+ return rdesc;
}
static const struct hid_device_id elecom_devices[] = {
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 855aa8e..3ee999d 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -25,6 +25,7 @@
#define USB_VENDOR_ID_A4TECH 0x09da
#define USB_DEVICE_ID_A4TECH_WCP32PU 0x0006
#define USB_DEVICE_ID_A4TECH_X5_005D 0x000a
+#define USB_DEVICE_ID_A4TECH_RP_649 0x001a
#define USB_VENDOR_ID_AASHIMA 0x06d6
#define USB_DEVICE_ID_AASHIMA_GAMEPAD 0x0025
@@ -63,6 +64,7 @@
#define USB_VENDOR_ID_APPLE 0x05ac
#define USB_DEVICE_ID_APPLE_MIGHTYMOUSE 0x0304
#define USB_DEVICE_ID_APPLE_MAGICMOUSE 0x030d
+#define USB_DEVICE_ID_APPLE_MAGICTRACKPAD 0x030e
#define USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI 0x020e
#define USB_DEVICE_ID_APPLE_FOUNTAIN_ISO 0x020f
#define USB_DEVICE_ID_APPLE_GEYSER_ANSI 0x0214
@@ -142,6 +144,7 @@
#define USB_DEVICE_ID_CH_FLIGHT_SIM_ECLIPSE_YOKE 0x0051
#define USB_DEVICE_ID_CH_FLIGHT_SIM_YOKE 0x00ff
#define USB_DEVICE_ID_CH_3AXIS_5BUTTON_STICK 0x00d3
+#define USB_DEVICE_ID_CH_AXIS_295 0x001c
#define USB_VENDOR_ID_CHERRY 0x046a
#define USB_DEVICE_ID_CHERRY_CYMOTION 0x0023
@@ -343,6 +346,7 @@
#define USB_DEVICE_ID_LOGITECH_RECEIVER 0xc101
#define USB_DEVICE_ID_LOGITECH_HARMONY_FIRST 0xc110
#define USB_DEVICE_ID_LOGITECH_HARMONY_LAST 0xc14f
+#define USB_DEVICE_ID_LOGITECH_RUMBLEPAD_CORD 0xc20a
#define USB_DEVICE_ID_LOGITECH_RUMBLEPAD 0xc211
#define USB_DEVICE_ID_LOGITECH_EXTREME_3D 0xc215
#define USB_DEVICE_ID_LOGITECH_RUMBLEPAD2 0xc218
@@ -354,6 +358,7 @@
#define USB_DEVICE_ID_LOGITECH_WINGMAN_FFG 0xc293
#define USB_DEVICE_ID_LOGITECH_MOMO_WHEEL 0xc295
#define USB_DEVICE_ID_LOGITECH_G25_WHEEL 0xc299
+#define USB_DEVICE_ID_LOGITECH_WII_WHEEL 0xc29c
#define USB_DEVICE_ID_LOGITECH_ELITE_KBD 0xc30a
#define USB_DEVICE_ID_S510_RECEIVER 0xc50c
#define USB_DEVICE_ID_S510_RECEIVER_2 0xc517
@@ -466,6 +471,8 @@
#define USB_VENDOR_ID_ROCCAT 0x1e7d
#define USB_DEVICE_ID_ROCCAT_KONE 0x2ced
+#define USB_DEVICE_ID_ROCCAT_PYRA_WIRED 0x2c24
+#define USB_DEVICE_ID_ROCCAT_PYRA_WIRELESS 0x2cf6
#define USB_VENDOR_ID_SAITEK 0x06a3
#define USB_DEVICE_ID_SAITEK_RUMBLEPAD 0xff17
@@ -485,6 +492,12 @@
#define USB_VENDOR_ID_STANTUM 0x1f87
#define USB_DEVICE_ID_MTP 0x0002
+#define USB_VENDOR_ID_STANTUM_STM 0x0483
+#define USB_DEVICE_ID_MTP_STM 0x3261
+
+#define USB_VENDOR_ID_STANTUM_SITRONIX 0x1403
+#define USB_DEVICE_ID_MTP_SITRONIX 0x5001
+
#define USB_VENDOR_ID_SUN 0x0430
#define USB_DEVICE_ID_RARITAN_KVM_DONGLE 0xcdab
@@ -514,8 +527,10 @@
#define USB_VENDOR_ID_UCLOGIC 0x5543
#define USB_DEVICE_ID_UCLOGIC_TABLET_PF1209 0x0042
-#define USB_DEVICE_ID_UCLOGIC_TABLET_WP4030U 0x0003
#define USB_DEVICE_ID_UCLOGIC_TABLET_KNA5 0x6001
+#define USB_DEVICE_ID_UCLOGIC_TABLET_WP4030U 0x0003
+#define USB_DEVICE_ID_UCLOGIC_TABLET_WP5540U 0x0004
+#define USB_DEVICE_ID_UCLOGIC_TABLET_WP8060U 0x0005
#define USB_VENDOR_ID_VERNIER 0x08f7
#define USB_DEVICE_ID_VERNIER_LABPRO 0x0001
@@ -527,6 +542,12 @@
#define USB_VENDOR_ID_WACOM 0x056a
#define USB_DEVICE_ID_WACOM_GRAPHIRE_BLUETOOTH 0x81
+#define USB_VENDOR_ID_WALTOP 0x172f
+#define USB_DEVICE_ID_WALTOP_SLIM_TABLET_5_8_INCH 0x0032
+#define USB_DEVICE_ID_WALTOP_SLIM_TABLET_12_1_INCH 0x0034
+#define USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH 0x0501
+#define USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH 0x0500
+
#define USB_VENDOR_ID_WISEGROUP 0x0925
#define USB_DEVICE_ID_SMARTJOY_PLUS 0x0005
#define USB_DEVICE_ID_1_PHIDGETSERVO_20 0x8101
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 6c03dcc..834ef47 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -149,6 +149,83 @@
}
+/**
+ * hidinput_calc_abs_res - calculate an absolute axis resolution
+ * @field: the HID report field to calculate resolution for
+ * @code: axis code
+ *
+ * The formula is:
+ * (logical_maximum - logical_minimum)
+ * resolution = ----------------------------------------------------------
+ * (physical_maximum - physical_minimum) * 10 ^ unit_exponent
+ *
+ * as seen in the HID specification v1.11 6.2.2.7 Global Items.
+ *
+ * Only exponent 1 length units are processed. Centimeters are converted to
+ * inches. Degrees are converted to radians.
+ */
+static __s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code)
+{
+ __s32 unit_exponent = field->unit_exponent;
+ __s32 logical_extents = field->logical_maximum -
+ field->logical_minimum;
+ __s32 physical_extents = field->physical_maximum -
+ field->physical_minimum;
+ __s32 prev;
+
+ /* Check if the extents are sane */
+ if (logical_extents <= 0 || physical_extents <= 0)
+ return 0;
+
+ /*
+ * Verify and convert units.
+ * See HID specification v1.11 6.2.2.7 Global Items for unit decoding
+ */
+ if (code == ABS_X || code == ABS_Y || code == ABS_Z) {
+ if (field->unit == 0x11) { /* If centimeters */
+ /* Convert to inches */
+ prev = logical_extents;
+ logical_extents *= 254;
+ if (logical_extents < prev)
+ return 0;
+ unit_exponent += 2;
+ } else if (field->unit != 0x13) { /* If not inches */
+ return 0;
+ }
+ } else if (code == ABS_RX || code == ABS_RY || code == ABS_RZ) {
+ if (field->unit == 0x14) { /* If degrees */
+ /* Convert to radians */
+ prev = logical_extents;
+ logical_extents *= 573;
+ if (logical_extents < prev)
+ return 0;
+ unit_exponent += 1;
+ } else if (field->unit != 0x12) { /* If not radians */
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+
+ /* Apply negative unit exponent */
+ for (; unit_exponent < 0; unit_exponent++) {
+ prev = logical_extents;
+ logical_extents *= 10;
+ if (logical_extents < prev)
+ return 0;
+ }
+ /* Apply positive unit exponent */
+ for (; unit_exponent > 0; unit_exponent--) {
+ prev = physical_extents;
+ physical_extents *= 10;
+ if (physical_extents < prev)
+ return 0;
+ }
+
+ /* Calculate resolution */
+ return logical_extents / physical_extents;
+}
+
static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_field *field,
struct hid_usage *usage)
{
@@ -336,6 +413,10 @@
map_key_clear(BTN_STYLUS);
break;
+ case 0x46: /* TabletPick */
+ map_key_clear(BTN_STYLUS2);
+ break;
+
default: goto unknown;
}
break;
@@ -537,6 +618,9 @@
input_set_abs_params(input, usage->code, a, b, (b - a) >> 8, (b - a) >> 4);
else input_set_abs_params(input, usage->code, a, b, 0, 0);
+ input_abs_set_res(input, usage->code,
+ hidinput_calc_abs_res(field, usage->code));
+
/* use a larger default input buffer for MT devices */
if (usage->code == ABS_MT_POSITION_X && input->hint_events_per_packet == 0)
input_set_events_per_packet(input, 60);
@@ -659,6 +743,9 @@
{
struct hid_input *hidinput;
+ if (hid->quirks & HID_QUIRK_NO_INPUT_SYNC)
+ return;
+
list_for_each_entry(hidinput, &hid->inputs, list)
input_sync(hidinput->input);
}
diff --git a/drivers/hid/hid-kye.c b/drivers/hid/hid-kye.c
index f887171..817247e 100644
--- a/drivers/hid/hid-kye.c
+++ b/drivers/hid/hid-kye.c
@@ -23,10 +23,10 @@
* - report size 8 count 1 must be size 1 count 8 for button bitfield
* - change the button usage range to 4-7 for the extra buttons
*/
-static void kye_report_fixup(struct hid_device *hdev, __u8 *rdesc,
- unsigned int rsize)
+static __u8 *kye_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
{
- if (rsize >= 74 &&
+ if (*rsize >= 74 &&
rdesc[61] == 0x05 && rdesc[62] == 0x08 &&
rdesc[63] == 0x19 && rdesc[64] == 0x08 &&
rdesc[65] == 0x29 && rdesc[66] == 0x0f &&
@@ -40,6 +40,7 @@
rdesc[72] = 0x01;
rdesc[74] = 0x08;
}
+ return rdesc;
}
static const struct hid_device_id kye_devices[] = {
diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c
index f6433d8..b629fba 100644
--- a/drivers/hid/hid-lg.c
+++ b/drivers/hid/hid-lg.c
@@ -7,6 +7,7 @@
* Copyright (c) 2006-2007 Jiri Kosina
* Copyright (c) 2007 Paul Walmsley
* Copyright (c) 2008 Jiri Slaby
+ * Copyright (c) 2010 Hendrik Iben
*/
/*
@@ -19,6 +20,9 @@
#include <linux/device.h>
#include <linux/hid.h>
#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
#include "hid-ids.h"
#include "hid-lg.h"
@@ -35,31 +39,43 @@
#define LG_FF2 0x400
#define LG_RDESC_REL_ABS 0x800
#define LG_FF3 0x1000
+#define LG_FF4 0x2000
/*
* Certain Logitech keyboards send in report #3 keys which are far
* above the logical maximum described in descriptor. This extends
* the original value of 0x28c of logical maximum to 0x104d
*/
-static void lg_report_fixup(struct hid_device *hdev, __u8 *rdesc,
- unsigned int rsize)
+static __u8 *lg_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
{
unsigned long quirks = (unsigned long)hid_get_drvdata(hdev);
- if ((quirks & LG_RDESC) && rsize >= 90 && rdesc[83] == 0x26 &&
+ if ((quirks & LG_RDESC) && *rsize >= 90 && rdesc[83] == 0x26 &&
rdesc[84] == 0x8c && rdesc[85] == 0x02) {
dev_info(&hdev->dev, "fixing up Logitech keyboard report "
"descriptor\n");
rdesc[84] = rdesc[89] = 0x4d;
rdesc[85] = rdesc[90] = 0x10;
}
- if ((quirks & LG_RDESC_REL_ABS) && rsize >= 50 &&
+ if ((quirks & LG_RDESC_REL_ABS) && *rsize >= 50 &&
rdesc[32] == 0x81 && rdesc[33] == 0x06 &&
rdesc[49] == 0x81 && rdesc[50] == 0x06) {
dev_info(&hdev->dev, "fixing up rel/abs in Logitech "
"report descriptor\n");
rdesc[33] = rdesc[50] = 0x02;
}
+ if ((quirks & LG_FF4) && *rsize >= 101 &&
+ rdesc[41] == 0x95 && rdesc[42] == 0x0B &&
+ rdesc[47] == 0x05 && rdesc[48] == 0x09) {
+ dev_info(&hdev->dev, "fixing up Logitech Speed Force Wireless "
+ "button descriptor\n");
+ rdesc[41] = 0x05;
+ rdesc[42] = 0x09;
+ rdesc[47] = 0x95;
+ rdesc[48] = 0x0B;
+ }
+ return rdesc;
}
#define lg_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \
@@ -285,12 +301,33 @@
goto err_free;
}
+ if (quirks & LG_FF4) {
+ unsigned char buf[] = { 0x00, 0xAF, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+
+ ret = hdev->hid_output_raw_report(hdev, buf, sizeof(buf), HID_FEATURE_REPORT);
+
+ if (ret >= 0) {
+ /* insert a little delay of 10 jiffies ~ 40ms */
+ wait_queue_head_t wait;
+ init_waitqueue_head (&wait);
+ wait_event_interruptible_timeout(wait, 0, 10);
+
+ /* Select random Address */
+ buf[1] = 0xB2;
+ get_random_bytes(&buf[2], 2);
+
+ ret = hdev->hid_output_raw_report(hdev, buf, sizeof(buf), HID_FEATURE_REPORT);
+ }
+ }
+
if (quirks & LG_FF)
lgff_init(hdev);
if (quirks & LG_FF2)
lg2ff_init(hdev);
if (quirks & LG_FF3)
lg3ff_init(hdev);
+ if (quirks & LG_FF4)
+ lg4ff_init(hdev);
return 0;
err_free:
@@ -325,6 +362,8 @@
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WHEEL),
.driver_data = LG_NOGET | LG_FF },
+ { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD_CORD),
+ .driver_data = LG_FF2 },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD),
.driver_data = LG_FF },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2_2),
@@ -339,6 +378,8 @@
.driver_data = LG_FF },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G25_WHEEL),
.driver_data = LG_FF },
+ { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WII_WHEEL),
+ .driver_data = LG_FF4 },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_FFG ),
.driver_data = LG_FF },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2),
diff --git a/drivers/hid/hid-lg.h b/drivers/hid/hid-lg.h
index ce2ac86..b0100ba 100644
--- a/drivers/hid/hid-lg.h
+++ b/drivers/hid/hid-lg.h
@@ -19,4 +19,10 @@
static inline int lg3ff_init(struct hid_device *hdev) { return -1; }
#endif
+#ifdef CONFIG_LOGIWII_FF
+int lg4ff_init(struct hid_device *hdev);
+#else
+static inline int lg4ff_init(struct hid_device *hdev) { return -1; }
+#endif
+
#endif
diff --git a/drivers/hid/hid-lg2ff.c b/drivers/hid/hid-lg2ff.c
index d888f1e..4258253 100644
--- a/drivers/hid/hid-lg2ff.c
+++ b/drivers/hid/hid-lg2ff.c
@@ -1,5 +1,5 @@
/*
- * Force feedback support for Logitech Rumblepad 2
+ * Force feedback support for Logitech RumblePad and Rumblepad 2
*
* Copyright (c) 2008 Anssi Hannula <anssi.hannula@gmail.com>
*/
@@ -110,7 +110,7 @@
usbhid_submit_report(hid, report, USB_DIR_OUT);
- dev_info(&hid->dev, "Force feedback for Logitech Rumblepad 2 by "
+ dev_info(&hid->dev, "Force feedback for Logitech RumblePad/Rumblepad 2 by "
"Anssi Hannula <anssi.hannula@gmail.com>\n");
return 0;
diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c
new file mode 100644
index 0000000..7eef5a2
--- /dev/null
+++ b/drivers/hid/hid-lg4ff.c
@@ -0,0 +1,136 @@
+/*
+ * Force feedback support for Logitech Speed Force Wireless
+ *
+ * http://wiibrew.org/wiki/Logitech_USB_steering_wheel
+ *
+ * Copyright (c) 2010 Simon Wood <simon@mungewell.org>
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#include <linux/input.h>
+#include <linux/usb.h>
+#include <linux/hid.h>
+
+#include "usbhid/usbhid.h"
+#include "hid-lg.h"
+
+struct lg4ff_device {
+ struct hid_report *report;
+};
+
+static const signed short ff4_wheel_ac[] = {
+ FF_CONSTANT,
+ FF_AUTOCENTER,
+ -1
+};
+
+static int hid_lg4ff_play(struct input_dev *dev, void *data,
+ struct ff_effect *effect)
+{
+ struct hid_device *hid = input_get_drvdata(dev);
+ struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list;
+ struct hid_report *report = list_entry(report_list->next, struct hid_report, list);
+ int x;
+
+#define CLAMP(x) if (x < 0) x = 0; if (x > 0xff) x = 0xff
+
+ switch (effect->type) {
+ case FF_CONSTANT:
+ x = effect->u.ramp.start_level + 0x80; /* 0x80 is no force */
+ CLAMP(x);
+ report->field[0]->value[0] = 0x11; /* Slot 1 */
+ report->field[0]->value[1] = 0x10;
+ report->field[0]->value[2] = x;
+ report->field[0]->value[3] = 0x00;
+ report->field[0]->value[4] = 0x00;
+ report->field[0]->value[5] = 0x08;
+ report->field[0]->value[6] = 0x00;
+ dbg_hid("Autocenter, x=0x%02X\n", x);
+
+ usbhid_submit_report(hid, report, USB_DIR_OUT);
+ break;
+ }
+ return 0;
+}
+
+static void hid_lg4ff_set_autocenter(struct input_dev *dev, u16 magnitude)
+{
+ struct hid_device *hid = input_get_drvdata(dev);
+ struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list;
+ struct hid_report *report = list_entry(report_list->next, struct hid_report, list);
+ __s32 *value = report->field[0]->value;
+
+ *value++ = 0xfe;
+ *value++ = 0x0d;
+ *value++ = 0x07;
+ *value++ = 0x07;
+ *value++ = (magnitude >> 8) & 0xff;
+ *value++ = 0x00;
+ *value = 0x00;
+
+ usbhid_submit_report(hid, report, USB_DIR_OUT);
+}
+
+
+int lg4ff_init(struct hid_device *hid)
+{
+ struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+ struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list;
+ struct input_dev *dev = hidinput->input;
+ struct hid_report *report;
+ struct hid_field *field;
+ const signed short *ff_bits = ff4_wheel_ac;
+ int error;
+ int i;
+
+ /* Find the report to use */
+ if (list_empty(report_list)) {
+ err_hid("No output report found");
+ return -1;
+ }
+
+ /* Check that the report looks ok */
+ report = list_entry(report_list->next, struct hid_report, list);
+ if (!report) {
+ err_hid("NULL output report");
+ return -1;
+ }
+
+ field = report->field[0];
+ if (!field) {
+ err_hid("NULL field");
+ return -1;
+ }
+
+ for (i = 0; ff_bits[i] >= 0; i++)
+ set_bit(ff_bits[i], dev->ffbit);
+
+ error = input_ff_create_memless(dev, NULL, hid_lg4ff_play);
+
+ if (error)
+ return error;
+
+ if (test_bit(FF_AUTOCENTER, dev->ffbit))
+ dev->ff->set_autocenter = hid_lg4ff_set_autocenter;
+
+ dev_info(&hid->dev, "Force feedback for Logitech Speed Force Wireless by "
+ "Simon Wood <simon@mungewell.org>\n");
+ return 0;
+}
+
diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index 319b0e5..e6dc151 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -2,6 +2,7 @@
* Apple "Magic" Wireless Mouse driver
*
* Copyright (c) 2010 Michael Poole <mdpoole@troilus.org>
+ * Copyright (c) 2010 Chase Douglas <chase.douglas@canonical.com>
*/
/*
@@ -53,7 +54,9 @@
module_param(report_undeciphered, bool, 0644);
MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state field using a MSC_RAW event");
-#define TOUCH_REPORT_ID 0x29
+#define TRACKPAD_REPORT_ID 0x28
+#define MOUSE_REPORT_ID 0x29
+#define DOUBLE_REPORT_ID 0xf7
/* These definitions are not precise, but they're close enough. (Bits
* 0x03 seem to indicate the aspect ratio of the touch, bits 0x70 seem
* to be some kind of bit mask -- 0x20 may be a near-field reading,
@@ -67,15 +70,19 @@
#define SCROLL_ACCEL_DEFAULT 7
+/* Single touch emulation should only begin when no touches are currently down.
+ * This is true when single_touch_id is equal to NO_TOUCHES. If multiple touches
+ * are down and the touch providing for single touch emulation is lifted,
+ * single_touch_id is equal to SINGLE_TOUCH_UP. While single touch emulation is
+ * occuring, single_touch_id corresponds with the tracking id of the touch used.
+ */
+#define NO_TOUCHES -1
+#define SINGLE_TOUCH_UP -2
+
/**
* struct magicmouse_sc - Tracks Magic Mouse-specific data.
* @input: Input device through which we report events.
* @quirks: Currently unused.
- * @last_timestamp: Timestamp from most recent (18-bit) touch report
- * (units of milliseconds over short windows, but seems to
- * increase faster when there are no touches).
- * @delta_time: 18-bit difference between the two most recent touch
- * reports from the mouse.
* @ntouches: Number of touches in most recent touch report.
* @scroll_accel: Number of consecutive scroll motions.
* @scroll_jiffies: Time of last scroll motion.
@@ -86,8 +93,6 @@
struct input_dev *input;
unsigned long quirks;
- int last_timestamp;
- int delta_time;
int ntouches;
int scroll_accel;
unsigned long scroll_jiffies;
@@ -98,9 +103,9 @@
short scroll_x;
short scroll_y;
u8 size;
- u8 down;
} touches[16];
int tracking_ids[16];
+ int single_touch_id;
};
static int magicmouse_firm_touch(struct magicmouse_sc *msc)
@@ -166,18 +171,35 @@
static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tdata)
{
struct input_dev *input = msc->input;
- __s32 x_y = tdata[0] << 8 | tdata[1] << 16 | tdata[2] << 24;
- int misc = tdata[5] | tdata[6] << 8;
- int id = (misc >> 6) & 15;
- int x = x_y << 12 >> 20;
- int y = -(x_y >> 20);
- int down = (tdata[7] & TOUCH_STATE_MASK) != TOUCH_STATE_NONE;
+ int id, x, y, size, orientation, touch_major, touch_minor, state, down;
+
+ if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) {
+ id = (tdata[6] << 2 | tdata[5] >> 6) & 0xf;
+ x = (tdata[1] << 28 | tdata[0] << 20) >> 20;
+ y = -((tdata[2] << 24 | tdata[1] << 16) >> 20);
+ size = tdata[5] & 0x3f;
+ orientation = (tdata[6] >> 2) - 32;
+ touch_major = tdata[3];
+ touch_minor = tdata[4];
+ state = tdata[7] & TOUCH_STATE_MASK;
+ down = state != TOUCH_STATE_NONE;
+ } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */
+ id = (tdata[7] << 2 | tdata[6] >> 6) & 0xf;
+ x = (tdata[1] << 27 | tdata[0] << 19) >> 19;
+ y = -((tdata[3] << 30 | tdata[2] << 22 | tdata[1] << 14) >> 19);
+ size = tdata[6] & 0x3f;
+ orientation = (tdata[7] >> 2) - 32;
+ touch_major = tdata[4];
+ touch_minor = tdata[5];
+ state = tdata[8] & TOUCH_STATE_MASK;
+ down = state != TOUCH_STATE_NONE;
+ }
/* Store tracking ID and other fields. */
msc->tracking_ids[raw_id] = id;
msc->touches[id].x = x;
msc->touches[id].y = y;
- msc->touches[id].size = misc & 63;
+ msc->touches[id].size = size;
/* If requested, emulate a scroll wheel by detecting small
* vertical touch motions.
@@ -188,7 +210,7 @@
int step_y = msc->touches[id].scroll_y - y;
/* Calculate and apply the scroll motion. */
- switch (tdata[7] & TOUCH_STATE_MASK) {
+ switch (state) {
case TOUCH_STATE_START:
msc->touches[id].scroll_x = x;
msc->touches[id].scroll_y = y;
@@ -222,21 +244,28 @@
}
}
+ if (down) {
+ msc->ntouches++;
+ if (msc->single_touch_id == NO_TOUCHES)
+ msc->single_touch_id = id;
+ } else if (msc->single_touch_id == id)
+ msc->single_touch_id = SINGLE_TOUCH_UP;
+
/* Generate the input events for this touch. */
if (report_touches && down) {
- int orientation = (misc >> 10) - 32;
-
- msc->touches[id].down = 1;
-
input_report_abs(input, ABS_MT_TRACKING_ID, id);
- input_report_abs(input, ABS_MT_TOUCH_MAJOR, tdata[3]);
- input_report_abs(input, ABS_MT_TOUCH_MINOR, tdata[4]);
+ input_report_abs(input, ABS_MT_TOUCH_MAJOR, touch_major << 2);
+ input_report_abs(input, ABS_MT_TOUCH_MINOR, touch_minor << 2);
input_report_abs(input, ABS_MT_ORIENTATION, orientation);
input_report_abs(input, ABS_MT_POSITION_X, x);
input_report_abs(input, ABS_MT_POSITION_Y, y);
- if (report_undeciphered)
- input_event(input, EV_MSC, MSC_RAW, tdata[7]);
+ if (report_undeciphered) {
+ if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE)
+ input_event(input, EV_MSC, MSC_RAW, tdata[7]);
+ else /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */
+ input_event(input, EV_MSC, MSC_RAW, tdata[8]);
+ }
input_mt_sync(input);
}
@@ -247,39 +276,43 @@
{
struct magicmouse_sc *msc = hid_get_drvdata(hdev);
struct input_dev *input = msc->input;
- int x, y, ts, ii, clicks, last_up;
+ int x = 0, y = 0, ii, clicks = 0, npoints;
switch (data[0]) {
- case 0x10:
- if (size != 6)
+ case TRACKPAD_REPORT_ID:
+ /* Expect four bytes of prefix, and N*9 bytes of touch data. */
+ if (size < 4 || ((size - 4) % 9) != 0)
return 0;
- x = (__s16)(data[2] | data[3] << 8);
- y = (__s16)(data[4] | data[5] << 8);
+ npoints = (size - 4) / 9;
+ msc->ntouches = 0;
+ for (ii = 0; ii < npoints; ii++)
+ magicmouse_emit_touch(msc, ii, data + ii * 9 + 4);
+
+ /* We don't need an MT sync here because trackpad emits a
+ * BTN_TOUCH event in a new frame when all touches are released.
+ */
+ if (msc->ntouches == 0)
+ msc->single_touch_id = NO_TOUCHES;
+
clicks = data[1];
+
+ /* The following bits provide a device specific timestamp. They
+ * are unused here.
+ *
+ * ts = data[1] >> 6 | data[2] << 2 | data[3] << 10;
+ */
break;
- case TOUCH_REPORT_ID:
+ case MOUSE_REPORT_ID:
/* Expect six bytes of prefix, and N*8 bytes of touch data. */
if (size < 6 || ((size - 6) % 8) != 0)
return 0;
- ts = data[3] >> 6 | data[4] << 2 | data[5] << 10;
- msc->delta_time = (ts - msc->last_timestamp) & 0x3ffff;
- msc->last_timestamp = ts;
- msc->ntouches = (size - 6) / 8;
- for (ii = 0; ii < msc->ntouches; ii++)
+ npoints = (size - 6) / 8;
+ msc->ntouches = 0;
+ for (ii = 0; ii < npoints; ii++)
magicmouse_emit_touch(msc, ii, data + ii * 8 + 6);
- if (report_touches) {
- last_up = 1;
- for (ii = 0; ii < ARRAY_SIZE(msc->touches); ii++) {
- if (msc->touches[ii].down) {
- last_up = 0;
- msc->touches[ii].down = 0;
- }
- }
- if (last_up) {
- input_mt_sync(input);
- }
- }
+ if (report_touches && msc->ntouches == 0)
+ input_mt_sync(input);
/* When emulating three-button mode, it is important
* to have the current touch information before
@@ -288,68 +321,72 @@
x = (int)(((data[3] & 0x0c) << 28) | (data[1] << 22)) >> 22;
y = (int)(((data[3] & 0x30) << 26) | (data[2] << 22)) >> 22;
clicks = data[3];
+
+ /* The following bits provide a device specific timestamp. They
+ * are unused here.
+ *
+ * ts = data[3] >> 6 | data[4] << 2 | data[5] << 10;
+ */
break;
- case 0x20: /* Theoretically battery status (0-100), but I have
- * never seen it -- maybe it is only upon request.
- */
- case 0x60: /* Unknown, maybe laser on/off. */
- case 0x61: /* Laser reflection status change.
- * data[1]: 0 = spotted, 1 = lost
- */
+ case DOUBLE_REPORT_ID:
+ /* Sometimes the trackpad sends two touch reports in one
+ * packet.
+ */
+ magicmouse_raw_event(hdev, report, data + 2, data[1]);
+ magicmouse_raw_event(hdev, report, data + 2 + data[1],
+ size - 2 - data[1]);
+ break;
default:
return 0;
}
- magicmouse_emit_buttons(msc, clicks & 3);
- input_report_rel(input, REL_X, x);
- input_report_rel(input, REL_Y, y);
+ if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) {
+ magicmouse_emit_buttons(msc, clicks & 3);
+ input_report_rel(input, REL_X, x);
+ input_report_rel(input, REL_Y, y);
+ } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */
+ input_report_key(input, BTN_MOUSE, clicks & 1);
+ input_report_key(input, BTN_TOUCH, msc->ntouches > 0);
+ input_report_key(input, BTN_TOOL_FINGER, msc->ntouches == 1);
+ input_report_key(input, BTN_TOOL_DOUBLETAP, msc->ntouches == 2);
+ input_report_key(input, BTN_TOOL_TRIPLETAP, msc->ntouches == 3);
+ input_report_key(input, BTN_TOOL_QUADTAP, msc->ntouches == 4);
+ if (msc->single_touch_id >= 0) {
+ input_report_abs(input, ABS_X,
+ msc->touches[msc->single_touch_id].x);
+ input_report_abs(input, ABS_Y,
+ msc->touches[msc->single_touch_id].y);
+ }
+ }
+
input_sync(input);
return 1;
}
-static int magicmouse_input_open(struct input_dev *dev)
-{
- struct hid_device *hid = input_get_drvdata(dev);
-
- return hid->ll_driver->open(hid);
-}
-
-static void magicmouse_input_close(struct input_dev *dev)
-{
- struct hid_device *hid = input_get_drvdata(dev);
-
- hid->ll_driver->close(hid);
-}
-
static void magicmouse_setup_input(struct input_dev *input, struct hid_device *hdev)
{
- input_set_drvdata(input, hdev);
- input->event = hdev->ll_driver->hidinput_input_event;
- input->open = magicmouse_input_open;
- input->close = magicmouse_input_close;
-
- input->name = hdev->name;
- input->phys = hdev->phys;
- input->uniq = hdev->uniq;
- input->id.bustype = hdev->bus;
- input->id.vendor = hdev->vendor;
- input->id.product = hdev->product;
- input->id.version = hdev->version;
- input->dev.parent = hdev->dev.parent;
-
__set_bit(EV_KEY, input->evbit);
- __set_bit(BTN_LEFT, input->keybit);
- __set_bit(BTN_RIGHT, input->keybit);
- if (emulate_3button)
- __set_bit(BTN_MIDDLE, input->keybit);
- __set_bit(BTN_TOOL_FINGER, input->keybit);
- __set_bit(EV_REL, input->evbit);
- __set_bit(REL_X, input->relbit);
- __set_bit(REL_Y, input->relbit);
- if (emulate_scroll_wheel) {
- __set_bit(REL_WHEEL, input->relbit);
- __set_bit(REL_HWHEEL, input->relbit);
+ if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) {
+ __set_bit(BTN_LEFT, input->keybit);
+ __set_bit(BTN_RIGHT, input->keybit);
+ if (emulate_3button)
+ __set_bit(BTN_MIDDLE, input->keybit);
+
+ __set_bit(EV_REL, input->evbit);
+ __set_bit(REL_X, input->relbit);
+ __set_bit(REL_Y, input->relbit);
+ if (emulate_scroll_wheel) {
+ __set_bit(REL_WHEEL, input->relbit);
+ __set_bit(REL_HWHEEL, input->relbit);
+ }
+ } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */
+ __set_bit(BTN_MOUSE, input->keybit);
+ __set_bit(BTN_TOOL_FINGER, input->keybit);
+ __set_bit(BTN_TOOL_DOUBLETAP, input->keybit);
+ __set_bit(BTN_TOOL_TRIPLETAP, input->keybit);
+ __set_bit(BTN_TOOL_QUADTAP, input->keybit);
+ __set_bit(BTN_TOUCH, input->keybit);
}
if (report_touches) {
@@ -359,16 +396,26 @@
input_set_abs_params(input, ABS_MT_TOUCH_MAJOR, 0, 255, 4, 0);
input_set_abs_params(input, ABS_MT_TOUCH_MINOR, 0, 255, 4, 0);
input_set_abs_params(input, ABS_MT_ORIENTATION, -32, 31, 1, 0);
- input_set_abs_params(input, ABS_MT_POSITION_X, -1100, 1358,
- 4, 0);
+
/* Note: Touch Y position from the device is inverted relative
* to how pointer motion is reported (and relative to how USB
* HID recommends the coordinates work). This driver keeps
* the origin at the same position, and just uses the additive
* inverse of the reported Y.
*/
- input_set_abs_params(input, ABS_MT_POSITION_Y, -1589, 2047,
- 4, 0);
+ if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) {
+ input_set_abs_params(input, ABS_MT_POSITION_X, -1100,
+ 1358, 4, 0);
+ input_set_abs_params(input, ABS_MT_POSITION_Y, -1589,
+ 2047, 4, 0);
+ } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */
+ input_set_abs_params(input, ABS_X, -2909, 3167, 4, 0);
+ input_set_abs_params(input, ABS_Y, -2456, 2565, 4, 0);
+ input_set_abs_params(input, ABS_MT_POSITION_X, -2909,
+ 3167, 4, 0);
+ input_set_abs_params(input, ABS_MT_POSITION_Y, -2456,
+ 2565, 4, 0);
+ }
}
if (report_undeciphered) {
@@ -377,12 +424,22 @@
}
}
+static int magicmouse_input_mapping(struct hid_device *hdev,
+ struct hid_input *hi, struct hid_field *field,
+ struct hid_usage *usage, unsigned long **bit, int *max)
+{
+ struct magicmouse_sc *msc = hid_get_drvdata(hdev);
+
+ if (!msc->input)
+ msc->input = hi->input;
+
+ return 0;
+}
+
static int magicmouse_probe(struct hid_device *hdev,
const struct hid_device_id *id)
{
- __u8 feature_1[] = { 0xd7, 0x01 };
- __u8 feature_2[] = { 0xf8, 0x01, 0x32 };
- struct input_dev *input;
+ __u8 feature[] = { 0xd7, 0x01 };
struct magicmouse_sc *msc;
struct hid_report *report;
int ret;
@@ -398,6 +455,8 @@
msc->quirks = id->driver_data;
hid_set_drvdata(hdev, msc);
+ msc->single_touch_id = NO_TOUCHES;
+
ret = hid_parse(hdev);
if (ret) {
dev_err(&hdev->dev, "magicmouse hid parse failed\n");
@@ -410,10 +469,22 @@
goto err_free;
}
- /* we are handling the input ourselves */
- hidinput_disconnect(hdev);
+ /* We do this after hid-input is done parsing reports so that
+ * hid-input uses the most natural button and axis IDs.
+ */
+ if (msc->input)
+ magicmouse_setup_input(msc->input, hdev);
- report = hid_register_report(hdev, HID_INPUT_REPORT, TOUCH_REPORT_ID);
+ if (id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE)
+ report = hid_register_report(hdev, HID_INPUT_REPORT,
+ MOUSE_REPORT_ID);
+ else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */
+ report = hid_register_report(hdev, HID_INPUT_REPORT,
+ TRACKPAD_REPORT_ID);
+ report = hid_register_report(hdev, HID_INPUT_REPORT,
+ DOUBLE_REPORT_ID);
+ }
+
if (!report) {
dev_err(&hdev->dev, "unable to register touch report\n");
ret = -ENOMEM;
@@ -421,39 +492,15 @@
}
report->size = 6;
- ret = hdev->hid_output_raw_report(hdev, feature_1, sizeof(feature_1),
+ ret = hdev->hid_output_raw_report(hdev, feature, sizeof(feature),
HID_FEATURE_REPORT);
- if (ret != sizeof(feature_1)) {
- dev_err(&hdev->dev, "unable to request touch data (1:%d)\n",
+ if (ret != sizeof(feature)) {
+ dev_err(&hdev->dev, "unable to request touch data (%d)\n",
ret);
goto err_stop_hw;
}
- ret = hdev->hid_output_raw_report(hdev, feature_2,
- sizeof(feature_2), HID_FEATURE_REPORT);
- if (ret != sizeof(feature_2)) {
- dev_err(&hdev->dev, "unable to request touch data (2:%d)\n",
- ret);
- goto err_stop_hw;
- }
-
- input = input_allocate_device();
- if (!input) {
- dev_err(&hdev->dev, "can't alloc input device\n");
- ret = -ENOMEM;
- goto err_stop_hw;
- }
- magicmouse_setup_input(input, hdev);
-
- ret = input_register_device(input);
- if (ret) {
- dev_err(&hdev->dev, "input device registration failed\n");
- goto err_input;
- }
- msc->input = input;
return 0;
-err_input:
- input_free_device(input);
err_stop_hw:
hid_hw_stop(hdev);
err_free:
@@ -466,13 +513,14 @@
struct magicmouse_sc *msc = hid_get_drvdata(hdev);
hid_hw_stop(hdev);
- input_unregister_device(msc->input);
kfree(msc);
}
static const struct hid_device_id magic_mice[] = {
- { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE),
- .driver_data = 0 },
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
+ USB_DEVICE_ID_APPLE_MAGICMOUSE), .driver_data = 0 },
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
+ USB_DEVICE_ID_APPLE_MAGICTRACKPAD), .driver_data = 0 },
{ }
};
MODULE_DEVICE_TABLE(hid, magic_mice);
@@ -483,6 +531,7 @@
.probe = magicmouse_probe,
.remove = magicmouse_remove,
.raw_event = magicmouse_raw_event,
+ .input_mapping = magicmouse_input_mapping,
};
static int __init magicmouse_init(void)
diff --git a/drivers/hid/hid-microsoft.c b/drivers/hid/hid-microsoft.c
index 359cc44..dc618c3 100644
--- a/drivers/hid/hid-microsoft.c
+++ b/drivers/hid/hid-microsoft.c
@@ -33,18 +33,19 @@
* Microsoft Wireless Desktop Receiver (Model 1028) has
* 'Usage Min/Max' where it ought to have 'Physical Min/Max'
*/
-static void ms_report_fixup(struct hid_device *hdev, __u8 *rdesc,
- unsigned int rsize)
+static __u8 *ms_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
{
unsigned long quirks = (unsigned long)hid_get_drvdata(hdev);
- if ((quirks & MS_RDESC) && rsize == 571 && rdesc[557] == 0x19 &&
+ if ((quirks & MS_RDESC) && *rsize == 571 && rdesc[557] == 0x19 &&
rdesc[559] == 0x29) {
dev_info(&hdev->dev, "fixing up Microsoft Wireless Receiver "
"Model 1028 report descriptor\n");
rdesc[557] = 0x35;
rdesc[559] = 0x45;
}
+ return rdesc;
}
#define ms_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \
diff --git a/drivers/hid/hid-monterey.c b/drivers/hid/hid-monterey.c
index 2cd05aa..c95c31e 100644
--- a/drivers/hid/hid-monterey.c
+++ b/drivers/hid/hid-monterey.c
@@ -22,14 +22,15 @@
#include "hid-ids.h"
-static void mr_report_fixup(struct hid_device *hdev, __u8 *rdesc,
- unsigned int rsize)
+static __u8 *mr_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
{
- if (rsize >= 30 && rdesc[29] == 0x05 && rdesc[30] == 0x09) {
+ if (*rsize >= 30 && rdesc[29] == 0x05 && rdesc[30] == 0x09) {
dev_info(&hdev->dev, "fixing up button/consumer in HID report "
"descriptor\n");
rdesc[30] = 0x0c;
}
+ return rdesc;
}
#define mr_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \
diff --git a/drivers/hid/hid-ntrig.c b/drivers/hid/hid-ntrig.c
index fb69b8c..69169ef 100644
--- a/drivers/hid/hid-ntrig.c
+++ b/drivers/hid/hid-ntrig.c
@@ -90,6 +90,55 @@
};
+/*
+ * This function converts the 4 byte raw firmware code into
+ * a string containing 5 comma separated numbers.
+ */
+static int ntrig_version_string(unsigned char *raw, char *buf)
+{
+ __u8 a = (raw[1] & 0x0e) >> 1;
+ __u8 b = (raw[0] & 0x3c) >> 2;
+ __u8 c = ((raw[0] & 0x03) << 3) | ((raw[3] & 0xe0) >> 5);
+ __u8 d = ((raw[3] & 0x07) << 3) | ((raw[2] & 0xe0) >> 5);
+ __u8 e = raw[2] & 0x07;
+
+ /*
+ * As yet unmapped bits:
+ * 0b11000000 0b11110001 0b00011000 0b00011000
+ */
+
+ return sprintf(buf, "%u.%u.%u.%u.%u", a, b, c, d, e);
+}
+
+static void ntrig_report_version(struct hid_device *hdev)
+{
+ int ret;
+ char buf[20];
+ struct usb_device *usb_dev = hid_to_usb_dev(hdev);
+ unsigned char *data = kmalloc(8, GFP_KERNEL);
+
+ if (!data)
+ goto err_free;
+
+ ret = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0),
+ USB_REQ_CLEAR_FEATURE,
+ USB_TYPE_CLASS | USB_RECIP_INTERFACE |
+ USB_DIR_IN,
+ 0x30c, 1, data, 8,
+ USB_CTRL_SET_TIMEOUT);
+
+ if (ret == 8) {
+ ret = ntrig_version_string(&data[2], buf);
+
+ dev_info(&hdev->dev,
+ "Firmware version: %s (%02x%02x %02x%02x)\n",
+ buf, data[2], data[3], data[4], data[5]);
+ }
+
+err_free:
+ kfree(data);
+}
+
static ssize_t show_phys_width(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -377,8 +426,8 @@
*/
static int ntrig_input_mapping(struct hid_device *hdev, struct hid_input *hi,
- struct hid_field *field, struct hid_usage *usage,
- unsigned long **bit, int *max)
+ struct hid_field *field, struct hid_usage *usage,
+ unsigned long **bit, int *max)
{
struct ntrig_data *nd = hid_get_drvdata(hdev);
@@ -448,13 +497,13 @@
/* width/height mapped on TouchMajor/TouchMinor/Orientation */
case HID_DG_WIDTH:
hid_map_usage(hi, usage, bit, max,
- EV_ABS, ABS_MT_TOUCH_MAJOR);
+ EV_ABS, ABS_MT_TOUCH_MAJOR);
return 1;
case HID_DG_HEIGHT:
hid_map_usage(hi, usage, bit, max,
- EV_ABS, ABS_MT_TOUCH_MINOR);
+ EV_ABS, ABS_MT_TOUCH_MINOR);
input_set_abs_params(hi->input, ABS_MT_ORIENTATION,
- 0, 1, 0, 0);
+ 0, 1, 0, 0);
return 1;
}
return 0;
@@ -468,8 +517,8 @@
}
static int ntrig_input_mapped(struct hid_device *hdev, struct hid_input *hi,
- struct hid_field *field, struct hid_usage *usage,
- unsigned long **bit, int *max)
+ struct hid_field *field, struct hid_usage *usage,
+ unsigned long **bit, int *max)
{
/* No special mappings needed for the pen and single touch */
if (field->physical)
@@ -489,7 +538,7 @@
* and call input_mt_sync after each point if necessary
*/
static int ntrig_event (struct hid_device *hid, struct hid_field *field,
- struct hid_usage *usage, __s32 value)
+ struct hid_usage *usage, __s32 value)
{
struct input_dev *input = field->hidinput->input;
struct ntrig_data *nd = hid_get_drvdata(hid);
@@ -848,6 +897,8 @@
if (report)
usbhid_submit_report(hdev, report, USB_DIR_OUT);
+ ntrig_report_version(hdev);
+
ret = sysfs_create_group(&hdev->dev.kobj,
&ntrig_attribute_group);
@@ -860,7 +911,7 @@
static void ntrig_remove(struct hid_device *hdev)
{
sysfs_remove_group(&hdev->dev.kobj,
- &ntrig_attribute_group);
+ &ntrig_attribute_group);
hid_hw_stop(hdev);
kfree(hid_get_drvdata(hdev));
}
diff --git a/drivers/hid/hid-ortek.c b/drivers/hid/hid-ortek.c
index aa9a960..2e79716 100644
--- a/drivers/hid/hid-ortek.c
+++ b/drivers/hid/hid-ortek.c
@@ -19,14 +19,15 @@
#include "hid-ids.h"
-static void ortek_report_fixup(struct hid_device *hdev, __u8 *rdesc,
- unsigned int rsize)
+static __u8 *ortek_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
{
- if (rsize >= 56 && rdesc[54] == 0x25 && rdesc[55] == 0x01) {
+ if (*rsize >= 56 && rdesc[54] == 0x25 && rdesc[55] == 0x01) {
dev_info(&hdev->dev, "Fixing up Ortek WKB-2000 "
"report descriptor.\n");
rdesc[55] = 0x92;
}
+ return rdesc;
}
static const struct hid_device_id ortek_devices[] = {
diff --git a/drivers/hid/hid-petalynx.c b/drivers/hid/hid-petalynx.c
index 500fbd0..308d6ae 100644
--- a/drivers/hid/hid-petalynx.c
+++ b/drivers/hid/hid-petalynx.c
@@ -23,10 +23,10 @@
#include "hid-ids.h"
/* Petalynx Maxter Remote has maximum for consumer page set too low */
-static void pl_report_fixup(struct hid_device *hdev, __u8 *rdesc,
- unsigned int rsize)
+static __u8 *pl_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
{
- if (rsize >= 60 && rdesc[39] == 0x2a && rdesc[40] == 0xf5 &&
+ if (*rsize >= 60 && rdesc[39] == 0x2a && rdesc[40] == 0xf5 &&
rdesc[41] == 0x00 && rdesc[59] == 0x26 &&
rdesc[60] == 0xf9 && rdesc[61] == 0x00) {
dev_info(&hdev->dev, "fixing up Petalynx Maxter Remote report "
@@ -34,6 +34,7 @@
rdesc[60] = 0xfa;
rdesc[40] = 0xfa;
}
+ return rdesc;
}
#define pl_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \
diff --git a/drivers/hid/hid-prodikeys.c b/drivers/hid/hid-prodikeys.c
index 845f428..48eab84 100644
--- a/drivers/hid/hid-prodikeys.c
+++ b/drivers/hid/hid-prodikeys.c
@@ -740,10 +740,10 @@
/*
* PC-MIDI report descriptor for report id is wrong.
*/
-static void pk_report_fixup(struct hid_device *hdev, __u8 *rdesc,
- unsigned int rsize)
+static __u8 *pk_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
{
- if (rsize == 178 &&
+ if (*rsize == 178 &&
rdesc[111] == 0x06 && rdesc[112] == 0x00 &&
rdesc[113] == 0xff) {
dev_info(&hdev->dev, "fixing up pc-midi keyboard report "
@@ -751,6 +751,7 @@
rdesc[144] = 0x18; /* report 4: was 0x10 report count */
}
+ return rdesc;
}
static int pk_input_mapping(struct hid_device *hdev, struct hid_input *hi,
diff --git a/drivers/hid/hid-roccat-pyra.c b/drivers/hid/hid-roccat-pyra.c
new file mode 100644
index 0000000..9bf2304
--- /dev/null
+++ b/drivers/hid/hid-roccat-pyra.c
@@ -0,0 +1,968 @@
+/*
+ * Roccat Pyra driver for Linux
+ *
+ * Copyright (c) 2010 Stefan Achatz <erazor_de@users.sourceforge.net>
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+/*
+ * Roccat Pyra is a mobile gamer mouse which comes in wired and wireless
+ * variant. Wireless variant is not tested.
+ * Userland tools can be found at http://sourceforge.net/projects/roccat
+ */
+
+#include <linux/device.h>
+#include <linux/input.h>
+#include <linux/hid.h>
+#include <linux/usb.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include "hid-ids.h"
+#include "hid-roccat.h"
+#include "hid-roccat-pyra.h"
+
+static void profile_activated(struct pyra_device *pyra,
+ unsigned int new_profile)
+{
+ pyra->actual_profile = new_profile;
+ pyra->actual_cpi = pyra->profile_settings[pyra->actual_profile].y_cpi;
+}
+
+static int pyra_send_control(struct usb_device *usb_dev, int value,
+ enum pyra_control_requests request)
+{
+ int len;
+ struct pyra_control control;
+
+ if ((request == PYRA_CONTROL_REQUEST_PROFILE_SETTINGS ||
+ request == PYRA_CONTROL_REQUEST_PROFILE_BUTTONS) &&
+ (value < 0 || value > 4))
+ return -EINVAL;
+
+ control.command = PYRA_COMMAND_CONTROL;
+ control.value = value;
+ control.request = request;
+
+ len = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0),
+ USB_REQ_SET_CONFIGURATION,
+ USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT,
+ PYRA_USB_COMMAND_CONTROL, 0, (char *)&control,
+ sizeof(struct pyra_control),
+ USB_CTRL_SET_TIMEOUT);
+
+ if (len != sizeof(struct pyra_control))
+ return len;
+
+ return 0;
+}
+
+static int pyra_receive_control_status(struct usb_device *usb_dev)
+{
+ int len;
+ struct pyra_control control;
+
+ do {
+ msleep(10);
+
+ len = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0),
+ USB_REQ_CLEAR_FEATURE,
+ USB_TYPE_CLASS | USB_RECIP_INTERFACE |
+ USB_DIR_IN,
+ PYRA_USB_COMMAND_CONTROL, 0, (char *)&control,
+ sizeof(struct pyra_control),
+ USB_CTRL_SET_TIMEOUT);
+
+ /* requested too early, try again */
+ } while (len == -EPROTO);
+
+ if (len == sizeof(struct pyra_control) &&
+ control.command == PYRA_COMMAND_CONTROL &&
+ control.request == PYRA_CONTROL_REQUEST_STATUS &&
+ control.value == 1)
+ return 0;
+ else {
+ dev_err(&usb_dev->dev, "receive control status: "
+ "unknown response 0x%x 0x%x\n",
+ control.request, control.value);
+ return -EINVAL;
+ }
+}
+
+static int pyra_get_profile_settings(struct usb_device *usb_dev,
+ struct pyra_profile_settings *buf, int number)
+{
+ int retval;
+
+ retval = pyra_send_control(usb_dev, number,
+ PYRA_CONTROL_REQUEST_PROFILE_SETTINGS);
+
+ if (retval)
+ return retval;
+
+ retval = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0),
+ USB_REQ_CLEAR_FEATURE,
+ USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN,
+ PYRA_USB_COMMAND_PROFILE_SETTINGS, 0, (char *)buf,
+ sizeof(struct pyra_profile_settings),
+ USB_CTRL_SET_TIMEOUT);
+
+ if (retval != sizeof(struct pyra_profile_settings))
+ return retval;
+
+ return 0;
+}
+
+static int pyra_get_profile_buttons(struct usb_device *usb_dev,
+ struct pyra_profile_buttons *buf, int number)
+{
+ int retval;
+
+ retval = pyra_send_control(usb_dev, number,
+ PYRA_CONTROL_REQUEST_PROFILE_BUTTONS);
+
+ if (retval)
+ return retval;
+
+ retval = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0),
+ USB_REQ_CLEAR_FEATURE,
+ USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN,
+ PYRA_USB_COMMAND_PROFILE_BUTTONS, 0, (char *)buf,
+ sizeof(struct pyra_profile_buttons),
+ USB_CTRL_SET_TIMEOUT);
+
+ if (retval != sizeof(struct pyra_profile_buttons))
+ return retval;
+
+ return 0;
+}
+
+static int pyra_get_settings(struct usb_device *usb_dev,
+ struct pyra_settings *buf)
+{
+ int len;
+ len = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0),
+ USB_REQ_CLEAR_FEATURE,
+ USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN,
+ PYRA_USB_COMMAND_SETTINGS, 0, buf,
+ sizeof(struct pyra_settings), USB_CTRL_SET_TIMEOUT);
+ if (len != sizeof(struct pyra_settings))
+ return -EIO;
+ return 0;
+}
+
+static int pyra_get_info(struct usb_device *usb_dev, struct pyra_info *buf)
+{
+ int len;
+ len = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0),
+ USB_REQ_CLEAR_FEATURE,
+ USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN,
+ PYRA_USB_COMMAND_INFO, 0, buf,
+ sizeof(struct pyra_info), USB_CTRL_SET_TIMEOUT);
+ if (len != sizeof(struct pyra_info))
+ return -EIO;
+ return 0;
+}
+
+static int pyra_set_profile_settings(struct usb_device *usb_dev,
+ struct pyra_profile_settings const *settings)
+{
+ int len;
+ len = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0),
+ USB_REQ_SET_CONFIGURATION,
+ USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT,
+ PYRA_USB_COMMAND_PROFILE_SETTINGS, 0, (char *)settings,
+ sizeof(struct pyra_profile_settings),
+ USB_CTRL_SET_TIMEOUT);
+ if (len != sizeof(struct pyra_profile_settings))
+ return -EIO;
+ if (pyra_receive_control_status(usb_dev))
+ return -EIO;
+ return 0;
+}
+
+static int pyra_set_profile_buttons(struct usb_device *usb_dev,
+ struct pyra_profile_buttons const *buttons)
+{
+ int len;
+ len = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0),
+ USB_REQ_SET_CONFIGURATION,
+ USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT,
+ PYRA_USB_COMMAND_PROFILE_BUTTONS, 0, (char *)buttons,
+ sizeof(struct pyra_profile_buttons),
+ USB_CTRL_SET_TIMEOUT);
+ if (len != sizeof(struct pyra_profile_buttons))
+ return -EIO;
+ if (pyra_receive_control_status(usb_dev))
+ return -EIO;
+ return 0;
+}
+
+static int pyra_set_settings(struct usb_device *usb_dev,
+ struct pyra_settings const *settings)
+{
+ int len;
+ len = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0),
+ USB_REQ_SET_CONFIGURATION,
+ USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT,
+ PYRA_USB_COMMAND_SETTINGS, 0, (char *)settings,
+ sizeof(struct pyra_settings), USB_CTRL_SET_TIMEOUT);
+ if (len != sizeof(struct pyra_settings))
+ return -EIO;
+ if (pyra_receive_control_status(usb_dev))
+ return -EIO;
+ return 0;
+}
+
+static ssize_t pyra_sysfs_read_profilex_settings(struct file *fp,
+ struct kobject *kobj, struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count, int number)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct pyra_device *pyra = hid_get_drvdata(dev_get_drvdata(dev));
+
+ if (off >= sizeof(struct pyra_profile_settings))
+ return 0;
+
+ if (off + count > sizeof(struct pyra_profile_settings))
+ count = sizeof(struct pyra_profile_settings) - off;
+
+ mutex_lock(&pyra->pyra_lock);
+ memcpy(buf, ((char const *)&pyra->profile_settings[number]) + off,
+ count);
+ mutex_unlock(&pyra->pyra_lock);
+
+ return count;
+}
+
+static ssize_t pyra_sysfs_read_profile1_settings(struct file *fp,
+ struct kobject *kobj, struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ return pyra_sysfs_read_profilex_settings(fp, kobj,
+ attr, buf, off, count, 0);
+}
+
+static ssize_t pyra_sysfs_read_profile2_settings(struct file *fp,
+ struct kobject *kobj, struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ return pyra_sysfs_read_profilex_settings(fp, kobj,
+ attr, buf, off, count, 1);
+}
+
+static ssize_t pyra_sysfs_read_profile3_settings(struct file *fp,
+ struct kobject *kobj, struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ return pyra_sysfs_read_profilex_settings(fp, kobj,
+ attr, buf, off, count, 2);
+}
+
+static ssize_t pyra_sysfs_read_profile4_settings(struct file *fp,
+ struct kobject *kobj, struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ return pyra_sysfs_read_profilex_settings(fp, kobj,
+ attr, buf, off, count, 3);
+}
+
+static ssize_t pyra_sysfs_read_profile5_settings(struct file *fp,
+ struct kobject *kobj, struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ return pyra_sysfs_read_profilex_settings(fp, kobj,
+ attr, buf, off, count, 4);
+}
+
+static ssize_t pyra_sysfs_read_profilex_buttons(struct file *fp,
+ struct kobject *kobj, struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count, int number)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct pyra_device *pyra = hid_get_drvdata(dev_get_drvdata(dev));
+
+ if (off >= sizeof(struct pyra_profile_buttons))
+ return 0;
+
+ if (off + count > sizeof(struct pyra_profile_buttons))
+ count = sizeof(struct pyra_profile_buttons) - off;
+
+ mutex_lock(&pyra->pyra_lock);
+ memcpy(buf, ((char const *)&pyra->profile_buttons[number]) + off,
+ count);
+ mutex_unlock(&pyra->pyra_lock);
+
+ return count;
+}
+
+static ssize_t pyra_sysfs_read_profile1_buttons(struct file *fp,
+ struct kobject *kobj, struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ return pyra_sysfs_read_profilex_buttons(fp, kobj,
+ attr, buf, off, count, 0);
+}
+
+static ssize_t pyra_sysfs_read_profile2_buttons(struct file *fp,
+ struct kobject *kobj, struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ return pyra_sysfs_read_profilex_buttons(fp, kobj,
+ attr, buf, off, count, 1);
+}
+
+static ssize_t pyra_sysfs_read_profile3_buttons(struct file *fp,
+ struct kobject *kobj, struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ return pyra_sysfs_read_profilex_buttons(fp, kobj,
+ attr, buf, off, count, 2);
+}
+
+static ssize_t pyra_sysfs_read_profile4_buttons(struct file *fp,
+ struct kobject *kobj, struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ return pyra_sysfs_read_profilex_buttons(fp, kobj,
+ attr, buf, off, count, 3);
+}
+
+static ssize_t pyra_sysfs_read_profile5_buttons(struct file *fp,
+ struct kobject *kobj, struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ return pyra_sysfs_read_profilex_buttons(fp, kobj,
+ attr, buf, off, count, 4);
+}
+
+static ssize_t pyra_sysfs_write_profile_settings(struct file *fp,
+ struct kobject *kobj, struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct pyra_device *pyra = hid_get_drvdata(dev_get_drvdata(dev));
+ struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
+ int retval = 0;
+ int difference;
+ int profile_number;
+ struct pyra_profile_settings *profile_settings;
+
+ if (off != 0 || count != sizeof(struct pyra_profile_settings))
+ return -EINVAL;
+
+ profile_number = ((struct pyra_profile_settings const *)buf)->number;
+ profile_settings = &pyra->profile_settings[profile_number];
+
+ mutex_lock(&pyra->pyra_lock);
+ difference = memcmp(buf, profile_settings,
+ sizeof(struct pyra_profile_settings));
+ if (difference) {
+ retval = pyra_set_profile_settings(usb_dev,
+ (struct pyra_profile_settings const *)buf);
+ if (!retval)
+ memcpy(profile_settings, buf,
+ sizeof(struct pyra_profile_settings));
+ }
+ mutex_unlock(&pyra->pyra_lock);
+
+ if (retval)
+ return retval;
+
+ return sizeof(struct pyra_profile_settings);
+}
+
+static ssize_t pyra_sysfs_write_profile_buttons(struct file *fp,
+ struct kobject *kobj, struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct pyra_device *pyra = hid_get_drvdata(dev_get_drvdata(dev));
+ struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
+ int retval = 0;
+ int difference;
+ int profile_number;
+ struct pyra_profile_buttons *profile_buttons;
+
+ if (off != 0 || count != sizeof(struct pyra_profile_buttons))
+ return -EINVAL;
+
+ profile_number = ((struct pyra_profile_buttons const *)buf)->number;
+ profile_buttons = &pyra->profile_buttons[profile_number];
+
+ mutex_lock(&pyra->pyra_lock);
+ difference = memcmp(buf, profile_buttons,
+ sizeof(struct pyra_profile_buttons));
+ if (difference) {
+ retval = pyra_set_profile_buttons(usb_dev,
+ (struct pyra_profile_buttons const *)buf);
+ if (!retval)
+ memcpy(profile_buttons, buf,
+ sizeof(struct pyra_profile_buttons));
+ }
+ mutex_unlock(&pyra->pyra_lock);
+
+ if (retval)
+ return retval;
+
+ return sizeof(struct pyra_profile_buttons);
+}
+
+static ssize_t pyra_sysfs_read_settings(struct file *fp,
+ struct kobject *kobj, struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct pyra_device *pyra = hid_get_drvdata(dev_get_drvdata(dev));
+
+ if (off >= sizeof(struct pyra_settings))
+ return 0;
+
+ if (off + count > sizeof(struct pyra_settings))
+ count = sizeof(struct pyra_settings) - off;
+
+ mutex_lock(&pyra->pyra_lock);
+ memcpy(buf, ((char const *)&pyra->settings) + off, count);
+ mutex_unlock(&pyra->pyra_lock);
+
+ return count;
+}
+
+static ssize_t pyra_sysfs_write_settings(struct file *fp,
+ struct kobject *kobj, struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct pyra_device *pyra = hid_get_drvdata(dev_get_drvdata(dev));
+ struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
+ int retval = 0;
+ int difference;
+
+ if (off != 0 || count != sizeof(struct pyra_settings))
+ return -EINVAL;
+
+ mutex_lock(&pyra->pyra_lock);
+ difference = memcmp(buf, &pyra->settings, sizeof(struct pyra_settings));
+ if (difference) {
+ retval = pyra_set_settings(usb_dev,
+ (struct pyra_settings const *)buf);
+ if (!retval)
+ memcpy(&pyra->settings, buf,
+ sizeof(struct pyra_settings));
+ }
+ mutex_unlock(&pyra->pyra_lock);
+
+ if (retval)
+ return retval;
+
+ profile_activated(pyra, pyra->settings.startup_profile);
+
+ return sizeof(struct pyra_settings);
+}
+
+
+static ssize_t pyra_sysfs_show_actual_cpi(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pyra_device *pyra = hid_get_drvdata(dev_get_drvdata(dev));
+ return snprintf(buf, PAGE_SIZE, "%d\n", pyra->actual_cpi);
+}
+
+static ssize_t pyra_sysfs_show_actual_profile(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pyra_device *pyra = hid_get_drvdata(dev_get_drvdata(dev));
+ return snprintf(buf, PAGE_SIZE, "%d\n", pyra->actual_profile);
+}
+
+static ssize_t pyra_sysfs_show_firmware_version(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pyra_device *pyra = hid_get_drvdata(dev_get_drvdata(dev));
+ return snprintf(buf, PAGE_SIZE, "%d\n", pyra->firmware_version);
+}
+
+static ssize_t pyra_sysfs_show_startup_profile(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pyra_device *pyra = hid_get_drvdata(dev_get_drvdata(dev));
+ return snprintf(buf, PAGE_SIZE, "%d\n", pyra->settings.startup_profile);
+}
+
+static DEVICE_ATTR(actual_cpi, 0440, pyra_sysfs_show_actual_cpi, NULL);
+
+static DEVICE_ATTR(actual_profile, 0440, pyra_sysfs_show_actual_profile, NULL);
+
+static DEVICE_ATTR(firmware_version, 0440,
+ pyra_sysfs_show_firmware_version, NULL);
+
+static DEVICE_ATTR(startup_profile, 0440,
+ pyra_sysfs_show_startup_profile, NULL);
+
+static struct attribute *pyra_attributes[] = {
+ &dev_attr_actual_cpi.attr,
+ &dev_attr_actual_profile.attr,
+ &dev_attr_firmware_version.attr,
+ &dev_attr_startup_profile.attr,
+ NULL
+};
+
+static struct attribute_group pyra_attribute_group = {
+ .attrs = pyra_attributes
+};
+
+static struct bin_attribute pyra_profile_settings_attr = {
+ .attr = { .name = "profile_settings", .mode = 0220 },
+ .size = sizeof(struct pyra_profile_settings),
+ .write = pyra_sysfs_write_profile_settings
+};
+
+static struct bin_attribute pyra_profile1_settings_attr = {
+ .attr = { .name = "profile1_settings", .mode = 0440 },
+ .size = sizeof(struct pyra_profile_settings),
+ .read = pyra_sysfs_read_profile1_settings
+};
+
+static struct bin_attribute pyra_profile2_settings_attr = {
+ .attr = { .name = "profile2_settings", .mode = 0440 },
+ .size = sizeof(struct pyra_profile_settings),
+ .read = pyra_sysfs_read_profile2_settings
+};
+
+static struct bin_attribute pyra_profile3_settings_attr = {
+ .attr = { .name = "profile3_settings", .mode = 0440 },
+ .size = sizeof(struct pyra_profile_settings),
+ .read = pyra_sysfs_read_profile3_settings
+};
+
+static struct bin_attribute pyra_profile4_settings_attr = {
+ .attr = { .name = "profile4_settings", .mode = 0440 },
+ .size = sizeof(struct pyra_profile_settings),
+ .read = pyra_sysfs_read_profile4_settings
+};
+
+static struct bin_attribute pyra_profile5_settings_attr = {
+ .attr = { .name = "profile5_settings", .mode = 0440 },
+ .size = sizeof(struct pyra_profile_settings),
+ .read = pyra_sysfs_read_profile5_settings
+};
+
+static struct bin_attribute pyra_profile_buttons_attr = {
+ .attr = { .name = "profile_buttons", .mode = 0220 },
+ .size = sizeof(struct pyra_profile_buttons),
+ .write = pyra_sysfs_write_profile_buttons
+};
+
+static struct bin_attribute pyra_profile1_buttons_attr = {
+ .attr = { .name = "profile1_buttons", .mode = 0440 },
+ .size = sizeof(struct pyra_profile_buttons),
+ .read = pyra_sysfs_read_profile1_buttons
+};
+
+static struct bin_attribute pyra_profile2_buttons_attr = {
+ .attr = { .name = "profile2_buttons", .mode = 0440 },
+ .size = sizeof(struct pyra_profile_buttons),
+ .read = pyra_sysfs_read_profile2_buttons
+};
+
+static struct bin_attribute pyra_profile3_buttons_attr = {
+ .attr = { .name = "profile3_buttons", .mode = 0440 },
+ .size = sizeof(struct pyra_profile_buttons),
+ .read = pyra_sysfs_read_profile3_buttons
+};
+
+static struct bin_attribute pyra_profile4_buttons_attr = {
+ .attr = { .name = "profile4_buttons", .mode = 0440 },
+ .size = sizeof(struct pyra_profile_buttons),
+ .read = pyra_sysfs_read_profile4_buttons
+};
+
+static struct bin_attribute pyra_profile5_buttons_attr = {
+ .attr = { .name = "profile5_buttons", .mode = 0440 },
+ .size = sizeof(struct pyra_profile_buttons),
+ .read = pyra_sysfs_read_profile5_buttons
+};
+
+static struct bin_attribute pyra_settings_attr = {
+ .attr = { .name = "settings", .mode = 0660 },
+ .size = sizeof(struct pyra_settings),
+ .read = pyra_sysfs_read_settings,
+ .write = pyra_sysfs_write_settings
+};
+
+static int pyra_create_sysfs_attributes(struct usb_interface *intf)
+{
+ int retval;
+
+ retval = sysfs_create_group(&intf->dev.kobj, &pyra_attribute_group);
+ if (retval)
+ goto exit_1;
+
+ retval = sysfs_create_bin_file(&intf->dev.kobj,
+ &pyra_profile_settings_attr);
+ if (retval)
+ goto exit_2;
+
+ retval = sysfs_create_bin_file(&intf->dev.kobj,
+ &pyra_profile1_settings_attr);
+ if (retval)
+ goto exit_3;
+
+ retval = sysfs_create_bin_file(&intf->dev.kobj,
+ &pyra_profile2_settings_attr);
+ if (retval)
+ goto exit_4;
+
+ retval = sysfs_create_bin_file(&intf->dev.kobj,
+ &pyra_profile3_settings_attr);
+ if (retval)
+ goto exit_5;
+
+ retval = sysfs_create_bin_file(&intf->dev.kobj,
+ &pyra_profile4_settings_attr);
+ if (retval)
+ goto exit_6;
+
+ retval = sysfs_create_bin_file(&intf->dev.kobj,
+ &pyra_profile5_settings_attr);
+ if (retval)
+ goto exit_7;
+
+ retval = sysfs_create_bin_file(&intf->dev.kobj,
+ &pyra_profile_buttons_attr);
+ if (retval)
+ goto exit_8;
+
+ retval = sysfs_create_bin_file(&intf->dev.kobj,
+ &pyra_profile1_buttons_attr);
+ if (retval)
+ goto exit_9;
+
+ retval = sysfs_create_bin_file(&intf->dev.kobj,
+ &pyra_profile2_buttons_attr);
+ if (retval)
+ goto exit_10;
+
+ retval = sysfs_create_bin_file(&intf->dev.kobj,
+ &pyra_profile3_buttons_attr);
+ if (retval)
+ goto exit_11;
+
+ retval = sysfs_create_bin_file(&intf->dev.kobj,
+ &pyra_profile4_buttons_attr);
+ if (retval)
+ goto exit_12;
+
+ retval = sysfs_create_bin_file(&intf->dev.kobj,
+ &pyra_profile5_buttons_attr);
+ if (retval)
+ goto exit_13;
+
+ retval = sysfs_create_bin_file(&intf->dev.kobj,
+ &pyra_settings_attr);
+ if (retval)
+ goto exit_14;
+
+ return 0;
+
+exit_14:
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile5_buttons_attr);
+exit_13:
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile4_buttons_attr);
+exit_12:
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile3_buttons_attr);
+exit_11:
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile2_buttons_attr);
+exit_10:
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile1_buttons_attr);
+exit_9:
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile_buttons_attr);
+exit_8:
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile5_settings_attr);
+exit_7:
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile4_settings_attr);
+exit_6:
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile3_settings_attr);
+exit_5:
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile2_settings_attr);
+exit_4:
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile1_settings_attr);
+exit_3:
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile_settings_attr);
+exit_2:
+ sysfs_remove_group(&intf->dev.kobj, &pyra_attribute_group);
+exit_1:
+ return retval;
+}
+
+static void pyra_remove_sysfs_attributes(struct usb_interface *intf)
+{
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_settings_attr);
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile5_buttons_attr);
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile4_buttons_attr);
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile3_buttons_attr);
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile2_buttons_attr);
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile1_buttons_attr);
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile_buttons_attr);
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile5_settings_attr);
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile4_settings_attr);
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile3_settings_attr);
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile2_settings_attr);
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile1_settings_attr);
+ sysfs_remove_bin_file(&intf->dev.kobj, &pyra_profile_settings_attr);
+ sysfs_remove_group(&intf->dev.kobj, &pyra_attribute_group);
+}
+
+static int pyra_init_pyra_device_struct(struct usb_device *usb_dev,
+ struct pyra_device *pyra)
+{
+ struct pyra_info *info;
+ int retval, i;
+
+ mutex_init(&pyra->pyra_lock);
+
+ info = kmalloc(sizeof(struct pyra_info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+ retval = pyra_get_info(usb_dev, info);
+ if (retval) {
+ kfree(info);
+ return retval;
+ }
+ pyra->firmware_version = info->firmware_version;
+ kfree(info);
+
+ retval = pyra_get_settings(usb_dev, &pyra->settings);
+ if (retval)
+ return retval;
+
+ for (i = 0; i < 5; ++i) {
+ retval = pyra_get_profile_settings(usb_dev,
+ &pyra->profile_settings[i], i);
+ if (retval)
+ return retval;
+
+ retval = pyra_get_profile_buttons(usb_dev,
+ &pyra->profile_buttons[i], i);
+ if (retval)
+ return retval;
+ }
+
+ profile_activated(pyra, pyra->settings.startup_profile);
+
+ return 0;
+}
+
+static int pyra_init_specials(struct hid_device *hdev)
+{
+ struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
+ struct usb_device *usb_dev = interface_to_usbdev(intf);
+ struct pyra_device *pyra;
+ int retval;
+
+ if (intf->cur_altsetting->desc.bInterfaceProtocol
+ == USB_INTERFACE_PROTOCOL_MOUSE) {
+
+ pyra = kzalloc(sizeof(*pyra), GFP_KERNEL);
+ if (!pyra) {
+ dev_err(&hdev->dev, "can't alloc device descriptor\n");
+ return -ENOMEM;
+ }
+ hid_set_drvdata(hdev, pyra);
+
+ retval = pyra_init_pyra_device_struct(usb_dev, pyra);
+ if (retval) {
+ dev_err(&hdev->dev,
+ "couldn't init struct pyra_device\n");
+ goto exit_free;
+ }
+
+ retval = roccat_connect(hdev);
+ if (retval < 0) {
+ dev_err(&hdev->dev, "couldn't init char dev\n");
+ } else {
+ pyra->chrdev_minor = retval;
+ pyra->roccat_claimed = 1;
+ }
+
+ retval = pyra_create_sysfs_attributes(intf);
+ if (retval) {
+ dev_err(&hdev->dev, "cannot create sysfs files\n");
+ goto exit_free;
+ }
+ } else {
+ hid_set_drvdata(hdev, NULL);
+ }
+
+ return 0;
+exit_free:
+ kfree(pyra);
+ return retval;
+}
+
+static void pyra_remove_specials(struct hid_device *hdev)
+{
+ struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
+ struct pyra_device *pyra;
+
+ if (intf->cur_altsetting->desc.bInterfaceProtocol
+ == USB_INTERFACE_PROTOCOL_MOUSE) {
+ pyra_remove_sysfs_attributes(intf);
+ pyra = hid_get_drvdata(hdev);
+ if (pyra->roccat_claimed)
+ roccat_disconnect(pyra->chrdev_minor);
+ kfree(hid_get_drvdata(hdev));
+ }
+}
+
+static int pyra_probe(struct hid_device *hdev, const struct hid_device_id *id)
+{
+ int retval;
+
+ retval = hid_parse(hdev);
+ if (retval) {
+ dev_err(&hdev->dev, "parse failed\n");
+ goto exit;
+ }
+
+ retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
+ if (retval) {
+ dev_err(&hdev->dev, "hw start failed\n");
+ goto exit;
+ }
+
+ retval = pyra_init_specials(hdev);
+ if (retval) {
+ dev_err(&hdev->dev, "couldn't install mouse\n");
+ goto exit_stop;
+ }
+ return 0;
+
+exit_stop:
+ hid_hw_stop(hdev);
+exit:
+ return retval;
+}
+
+static void pyra_remove(struct hid_device *hdev)
+{
+ pyra_remove_specials(hdev);
+ hid_hw_stop(hdev);
+}
+
+static void pyra_keep_values_up_to_date(struct pyra_device *pyra,
+ u8 const *data)
+{
+ struct pyra_mouse_event_button const *button_event;
+
+ switch (data[0]) {
+ case PYRA_MOUSE_REPORT_NUMBER_BUTTON:
+ button_event = (struct pyra_mouse_event_button const *)data;
+ switch (button_event->type) {
+ case PYRA_MOUSE_EVENT_BUTTON_TYPE_PROFILE_2:
+ profile_activated(pyra, button_event->data1 - 1);
+ break;
+ case PYRA_MOUSE_EVENT_BUTTON_TYPE_CPI:
+ pyra->actual_cpi = button_event->data1;
+ break;
+ }
+ break;
+ }
+}
+
+static void pyra_report_to_chrdev(struct pyra_device const *pyra,
+ u8 const *data)
+{
+ struct pyra_roccat_report roccat_report;
+ struct pyra_mouse_event_button const *button_event;
+
+ if (data[0] != PYRA_MOUSE_REPORT_NUMBER_BUTTON)
+ return;
+
+ button_event = (struct pyra_mouse_event_button const *)data;
+
+ switch (button_event->type) {
+ case PYRA_MOUSE_EVENT_BUTTON_TYPE_PROFILE_2:
+ case PYRA_MOUSE_EVENT_BUTTON_TYPE_CPI:
+ roccat_report.type = button_event->type;
+ roccat_report.value = button_event->data1;
+ roccat_report.key = 0;
+ roccat_report_event(pyra->chrdev_minor,
+ (uint8_t const *)&roccat_report,
+ sizeof(struct pyra_roccat_report));
+ break;
+ case PYRA_MOUSE_EVENT_BUTTON_TYPE_MACRO:
+ case PYRA_MOUSE_EVENT_BUTTON_TYPE_SHORTCUT:
+ case PYRA_MOUSE_EVENT_BUTTON_TYPE_QUICKLAUNCH:
+ if (button_event->data2 == PYRA_MOUSE_EVENT_BUTTON_PRESS) {
+ roccat_report.type = button_event->type;
+ roccat_report.key = button_event->data1;
+ /*
+ * pyra reports profile numbers with range 1-5.
+ * Keeping this behaviour.
+ */
+ roccat_report.value = pyra->actual_profile + 1;
+ roccat_report_event(pyra->chrdev_minor,
+ (uint8_t const *)&roccat_report,
+ sizeof(struct pyra_roccat_report));
+ }
+ break;
+ }
+}
+
+static int pyra_raw_event(struct hid_device *hdev, struct hid_report *report,
+ u8 *data, int size)
+{
+ struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
+ struct pyra_device *pyra = hid_get_drvdata(hdev);
+
+ if (intf->cur_altsetting->desc.bInterfaceProtocol
+ != USB_INTERFACE_PROTOCOL_MOUSE)
+ return 0;
+
+ pyra_keep_values_up_to_date(pyra, data);
+
+ if (pyra->roccat_claimed)
+ pyra_report_to_chrdev(pyra, data);
+
+ return 0;
+}
+
+static const struct hid_device_id pyra_devices[] = {
+ { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT,
+ USB_DEVICE_ID_ROCCAT_PYRA_WIRED) },
+ /* TODO add USB_DEVICE_ID_ROCCAT_PYRA_WIRELESS after testing */
+ { }
+};
+
+MODULE_DEVICE_TABLE(hid, pyra_devices);
+
+static struct hid_driver pyra_driver = {
+ .name = "pyra",
+ .id_table = pyra_devices,
+ .probe = pyra_probe,
+ .remove = pyra_remove,
+ .raw_event = pyra_raw_event
+};
+
+static int __init pyra_init(void)
+{
+ return hid_register_driver(&pyra_driver);
+}
+
+static void __exit pyra_exit(void)
+{
+ hid_unregister_driver(&pyra_driver);
+}
+
+module_init(pyra_init);
+module_exit(pyra_exit);
+
+MODULE_AUTHOR("Stefan Achatz");
+MODULE_DESCRIPTION("USB Roccat Pyra driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hid/hid-roccat-pyra.h b/drivers/hid/hid-roccat-pyra.h
new file mode 100644
index 0000000..22f80a8
--- /dev/null
+++ b/drivers/hid/hid-roccat-pyra.h
@@ -0,0 +1,186 @@
+#ifndef __HID_ROCCAT_PYRA_H
+#define __HID_ROCCAT_PYRA_H
+
+/*
+ * Copyright (c) 2010 Stefan Achatz <erazor_de@users.sourceforge.net>
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/types.h>
+
+#pragma pack(push)
+#pragma pack(1)
+
+struct pyra_b {
+ uint8_t command; /* PYRA_COMMAND_B */
+ uint8_t size; /* always 3 */
+ uint8_t unknown; /* 1 */
+};
+
+struct pyra_control {
+ uint8_t command; /* PYRA_COMMAND_CONTROL */
+ /*
+ * value is profile number for request_settings and request_buttons
+ * 1 if status ok for request_status
+ */
+ uint8_t value; /* Range 0-4 */
+ uint8_t request;
+};
+
+enum pyra_control_requests {
+ PYRA_CONTROL_REQUEST_STATUS = 0x00,
+ PYRA_CONTROL_REQUEST_PROFILE_SETTINGS = 0x10,
+ PYRA_CONTROL_REQUEST_PROFILE_BUTTONS = 0x20
+};
+
+struct pyra_settings {
+ uint8_t command; /* PYRA_COMMAND_SETTINGS */
+ uint8_t size; /* always 3 */
+ uint8_t startup_profile; /* Range 0-4! */
+};
+
+struct pyra_profile_settings {
+ uint8_t command; /* PYRA_COMMAND_PROFILE_SETTINGS */
+ uint8_t size; /* always 0xd */
+ uint8_t number; /* Range 0-4 */
+ uint8_t xysync;
+ uint8_t x_sensitivity; /* 0x1-0xa */
+ uint8_t y_sensitivity;
+ uint8_t x_cpi; /* unused */
+ uint8_t y_cpi; /* this value is for x and y */
+ uint8_t lightswitch; /* 0 = off, 1 = on */
+ uint8_t light_effect;
+ uint8_t handedness;
+ uint16_t checksum; /* byte sum */
+};
+
+struct pyra_profile_buttons {
+ uint8_t command; /* PYRA_COMMAND_PROFILE_BUTTONS */
+ uint8_t size; /* always 0x13 */
+ uint8_t number; /* Range 0-4 */
+ uint8_t buttons[14];
+ uint16_t checksum; /* byte sum */
+};
+
+struct pyra_info {
+ uint8_t command; /* PYRA_COMMAND_INFO */
+ uint8_t size; /* always 6 */
+ uint8_t firmware_version;
+ uint8_t unknown1; /* always 0 */
+ uint8_t unknown2; /* always 1 */
+ uint8_t unknown3; /* always 0 */
+};
+
+enum pyra_commands {
+ PYRA_COMMAND_CONTROL = 0x4,
+ PYRA_COMMAND_SETTINGS = 0x5,
+ PYRA_COMMAND_PROFILE_SETTINGS = 0x6,
+ PYRA_COMMAND_PROFILE_BUTTONS = 0x7,
+ PYRA_COMMAND_INFO = 0x9,
+ PYRA_COMMAND_B = 0xb
+};
+
+enum pyra_usb_commands {
+ PYRA_USB_COMMAND_CONTROL = 0x304,
+ PYRA_USB_COMMAND_SETTINGS = 0x305,
+ PYRA_USB_COMMAND_PROFILE_SETTINGS = 0x306,
+ PYRA_USB_COMMAND_PROFILE_BUTTONS = 0x307,
+ PYRA_USB_COMMAND_INFO = 0x309,
+ PYRA_USB_COMMAND_B = 0x30b /* writes 3 bytes */
+};
+
+enum pyra_mouse_report_numbers {
+ PYRA_MOUSE_REPORT_NUMBER_HID = 1,
+ PYRA_MOUSE_REPORT_NUMBER_AUDIO = 2,
+ PYRA_MOUSE_REPORT_NUMBER_BUTTON = 3,
+};
+
+struct pyra_mouse_event_button {
+ uint8_t report_number; /* always 3 */
+ uint8_t unknown; /* always 0 */
+ uint8_t type;
+ uint8_t data1;
+ uint8_t data2;
+};
+
+struct pyra_mouse_event_audio {
+ uint8_t report_number; /* always 2 */
+ uint8_t type;
+ uint8_t unused; /* always 0 */
+};
+
+/* hid audio controls */
+enum pyra_mouse_event_audio_types {
+ PYRA_MOUSE_EVENT_AUDIO_TYPE_MUTE = 0xe2,
+ PYRA_MOUSE_EVENT_AUDIO_TYPE_VOLUME_UP = 0xe9,
+ PYRA_MOUSE_EVENT_AUDIO_TYPE_VOLUME_DOWN = 0xea,
+};
+
+enum pyra_mouse_event_button_types {
+ /*
+ * Mouse sends tilt events on report_number 1 and 3
+ * Tilt events are sent repeatedly with 0.94s between first and second
+ * event and 0.22s on subsequent
+ */
+ PYRA_MOUSE_EVENT_BUTTON_TYPE_TILT = 0x10,
+
+ /*
+ * These are sent sequentially
+ * data1 contains new profile number in range 1-5
+ */
+ PYRA_MOUSE_EVENT_BUTTON_TYPE_PROFILE_1 = 0x20,
+ PYRA_MOUSE_EVENT_BUTTON_TYPE_PROFILE_2 = 0x30,
+
+ /*
+ * data1 = button_number (rmp index)
+ * data2 = pressed/released
+ */
+ PYRA_MOUSE_EVENT_BUTTON_TYPE_MACRO = 0x40,
+ PYRA_MOUSE_EVENT_BUTTON_TYPE_SHORTCUT = 0x50,
+
+ /*
+ * data1 = button_number (rmp index)
+ */
+ PYRA_MOUSE_EVENT_BUTTON_TYPE_QUICKLAUNCH = 0x60,
+
+ /* data1 = new cpi */
+ PYRA_MOUSE_EVENT_BUTTON_TYPE_CPI = 0xb0,
+
+ /* data1 and data2 = new sensitivity */
+ PYRA_MOUSE_EVENT_BUTTON_TYPE_SENSITIVITY = 0xc0,
+
+ PYRA_MOUSE_EVENT_BUTTON_TYPE_MULTIMEDIA = 0xf0,
+};
+
+enum {
+ PYRA_MOUSE_EVENT_BUTTON_PRESS = 0,
+ PYRA_MOUSE_EVENT_BUTTON_RELEASE = 1,
+};
+
+struct pyra_roccat_report {
+ uint8_t type;
+ uint8_t value;
+ uint8_t key;
+};
+
+#pragma pack(pop)
+
+struct pyra_device {
+ int actual_profile;
+ int actual_cpi;
+ int firmware_version;
+ int roccat_claimed;
+ int chrdev_minor;
+ struct mutex pyra_lock;
+ struct pyra_settings settings;
+ struct pyra_profile_settings profile_settings[5];
+ struct pyra_profile_buttons profile_buttons[5];
+};
+
+#endif
diff --git a/drivers/hid/hid-samsung.c b/drivers/hid/hid-samsung.c
index bda0fd6..3589444 100644
--- a/drivers/hid/hid-samsung.c
+++ b/drivers/hid/hid-samsung.c
@@ -61,10 +61,10 @@
"descriptor\n", rsize);
}
-static void samsung_irda_report_fixup(struct hid_device *hdev, __u8 *rdesc,
- unsigned int rsize)
+static __u8 *samsung_irda_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
{
- if (rsize == 184 && rdesc[175] == 0x25 && rdesc[176] == 0x40 &&
+ if (*rsize == 184 && rdesc[175] == 0x25 && rdesc[176] == 0x40 &&
rdesc[177] == 0x75 && rdesc[178] == 0x30 &&
rdesc[179] == 0x95 && rdesc[180] == 0x01 &&
rdesc[182] == 0x40) {
@@ -74,24 +74,25 @@
rdesc[180] = 0x06;
rdesc[182] = 0x42;
} else
- if (rsize == 203 && rdesc[192] == 0x15 && rdesc[193] == 0x0 &&
+ if (*rsize == 203 && rdesc[192] == 0x15 && rdesc[193] == 0x0 &&
rdesc[194] == 0x25 && rdesc[195] == 0x12) {
samsung_irda_dev_trace(hdev, 203);
rdesc[193] = 0x1;
rdesc[195] = 0xf;
} else
- if (rsize == 135 && rdesc[124] == 0x15 && rdesc[125] == 0x0 &&
+ if (*rsize == 135 && rdesc[124] == 0x15 && rdesc[125] == 0x0 &&
rdesc[126] == 0x25 && rdesc[127] == 0x11) {
samsung_irda_dev_trace(hdev, 135);
rdesc[125] = 0x1;
rdesc[127] = 0xe;
} else
- if (rsize == 171 && rdesc[160] == 0x15 && rdesc[161] == 0x0 &&
+ if (*rsize == 171 && rdesc[160] == 0x15 && rdesc[161] == 0x0 &&
rdesc[162] == 0x25 && rdesc[163] == 0x01) {
samsung_irda_dev_trace(hdev, 171);
rdesc[161] = 0x1;
rdesc[163] = 0x3;
}
+ return rdesc;
}
#define samsung_kbd_mouse_map_key_clear(c) \
@@ -130,11 +131,12 @@
return 1;
}
-static void samsung_report_fixup(struct hid_device *hdev, __u8 *rdesc,
- unsigned int rsize)
+static __u8 *samsung_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
{
if (USB_DEVICE_ID_SAMSUNG_IR_REMOTE == hdev->product)
- samsung_irda_report_fixup(hdev, rdesc, rsize);
+ rdesc = samsung_irda_report_fixup(hdev, rdesc, rsize);
+ return rdesc;
}
static int samsung_input_mapping(struct hid_device *hdev, struct hid_input *hi,
diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index 402d557..677bb3d 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -24,24 +24,46 @@
#include "hid-ids.h"
-#define VAIO_RDESC_CONSTANT 0x0001
+#define VAIO_RDESC_CONSTANT (1 << 0)
+#define SIXAXIS_CONTROLLER_USB (1 << 1)
+#define SIXAXIS_CONTROLLER_BT (1 << 2)
struct sony_sc {
unsigned long quirks;
};
/* Sony Vaio VGX has wrongly mouse pointer declared as constant */
-static void sony_report_fixup(struct hid_device *hdev, __u8 *rdesc,
- unsigned int rsize)
+static __u8 *sony_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
{
struct sony_sc *sc = hid_get_drvdata(hdev);
if ((sc->quirks & VAIO_RDESC_CONSTANT) &&
- rsize >= 56 && rdesc[54] == 0x81 && rdesc[55] == 0x07) {
+ *rsize >= 56 && rdesc[54] == 0x81 && rdesc[55] == 0x07) {
dev_info(&hdev->dev, "Fixing up Sony Vaio VGX report "
"descriptor\n");
rdesc[55] = 0x06;
}
+ return rdesc;
+}
+
+static int sixaxis_usb_output_raw_report(struct hid_device *hid, __u8 *buf,
+ size_t count, unsigned char report_type)
+{
+ struct usb_interface *intf = to_usb_interface(hid->dev.parent);
+ struct usb_device *dev = interface_to_usbdev(intf);
+ struct usb_host_interface *interface = intf->cur_altsetting;
+ int report_id = buf[0];
+ int ret;
+
+ ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
+ HID_REQ_SET_REPORT,
+ USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+ ((report_type + 1) << 8) | report_id,
+ interface->desc.bInterfaceNumber, buf, count,
+ USB_CTRL_SET_TIMEOUT);
+
+ return ret;
}
/*
@@ -49,7 +71,7 @@
* to "operational". Without this, the ps3 controller will not report any
* events.
*/
-static int sony_set_operational_usb(struct hid_device *hdev)
+static int sixaxis_set_operational_usb(struct hid_device *hdev)
{
struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
struct usb_device *dev = interface_to_usbdev(intf);
@@ -74,7 +96,7 @@
return ret;
}
-static int sony_set_operational_bt(struct hid_device *hdev)
+static int sixaxis_set_operational_bt(struct hid_device *hdev)
{
unsigned char buf[] = { 0xf4, 0x42, 0x03, 0x00, 0x00 };
return hdev->hid_output_raw_report(hdev, buf, sizeof(buf), HID_FEATURE_REPORT);
@@ -108,16 +130,14 @@
goto err_free;
}
- switch (hdev->bus) {
- case BUS_USB:
- ret = sony_set_operational_usb(hdev);
- break;
- case BUS_BLUETOOTH:
- ret = sony_set_operational_bt(hdev);
- break;
- default:
- ret = 0;
+ if (sc->quirks & SIXAXIS_CONTROLLER_USB) {
+ hdev->hid_output_raw_report = sixaxis_usb_output_raw_report;
+ ret = sixaxis_set_operational_usb(hdev);
}
+ else if (sc->quirks & SIXAXIS_CONTROLLER_BT)
+ ret = sixaxis_set_operational_bt(hdev);
+ else
+ ret = 0;
if (ret < 0)
goto err_stop;
@@ -137,8 +157,10 @@
}
static const struct hid_device_id sony_devices[] = {
- { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER) },
- { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER),
+ .driver_data = SIXAXIS_CONTROLLER_USB },
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER),
+ .driver_data = SIXAXIS_CONTROLLER_BT },
{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGX_MOUSE),
.driver_data = VAIO_RDESC_CONSTANT },
{ }
diff --git a/drivers/hid/hid-stantum.c b/drivers/hid/hid-stantum.c
index 90df886..3171be2 100644
--- a/drivers/hid/hid-stantum.c
+++ b/drivers/hid/hid-stantum.c
@@ -249,6 +249,8 @@
static const struct hid_device_id stantum_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_STANTUM, USB_DEVICE_ID_MTP) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_STANTUM_STM, USB_DEVICE_ID_MTP_STM) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_STANTUM_SITRONIX, USB_DEVICE_ID_MTP_SITRONIX) },
{ }
};
MODULE_DEVICE_TABLE(hid, stantum_devices);
diff --git a/drivers/hid/hid-sunplus.c b/drivers/hid/hid-sunplus.c
index 438107d..164ed56 100644
--- a/drivers/hid/hid-sunplus.c
+++ b/drivers/hid/hid-sunplus.c
@@ -22,16 +22,17 @@
#include "hid-ids.h"
-static void sp_report_fixup(struct hid_device *hdev, __u8 *rdesc,
- unsigned int rsize)
+static __u8 *sp_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
{
- if (rsize >= 107 && rdesc[104] == 0x26 && rdesc[105] == 0x80 &&
+ if (*rsize >= 107 && rdesc[104] == 0x26 && rdesc[105] == 0x80 &&
rdesc[106] == 0x03) {
dev_info(&hdev->dev, "fixing up Sunplus Wireless Desktop "
"report descriptor\n");
rdesc[105] = rdesc[110] = 0x03;
rdesc[106] = rdesc[111] = 0x21;
}
+ return rdesc;
}
#define sp_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \
diff --git a/drivers/hid/hid-uclogic.c b/drivers/hid/hid-uclogic.c
new file mode 100644
index 0000000..05fdc85a
--- /dev/null
+++ b/drivers/hid/hid-uclogic.c
@@ -0,0 +1,623 @@
+/*
+ * HID driver for UC-Logic devices not fully compliant with HID standard
+ *
+ * Copyright (c) 2010 Nikolai Kondrashov
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/hid.h>
+#include <linux/module.h>
+
+#include "hid-ids.h"
+
+/*
+ * The original descriptors of WPXXXXU tablets have three report IDs, of
+ * which only two are used (8 and 9), and the remaining (7) seems to have
+ * the originally intended pen description which was abandoned for some
+ * reason. From this unused description it is possible to extract the
+ * actual physical extents and resolution. All the models use the same
+ * descriptor with different extents for the unused report ID.
+ *
+ * Here it is:
+ *
+ * Usage Page (Digitizer), ; Digitizer (0Dh)
+ * Usage (Pen), ; Pen (02h, application collection)
+ * Collection (Application),
+ * Report ID (7),
+ * Usage (Stylus), ; Stylus (20h, logical collection)
+ * Collection (Physical),
+ * Usage (Tip Switch), ; Tip switch (42h, momentary control)
+ * Usage (Barrel Switch), ; Barrel switch (44h, momentary control)
+ * Usage (Eraser), ; Eraser (45h, momentary control)
+ * Logical Minimum (0),
+ * Logical Maximum (1),
+ * Report Size (1),
+ * Report Count (3),
+ * Input (Variable),
+ * Report Count (3),
+ * Input (Constant, Variable),
+ * Usage (In Range), ; In range (32h, momentary control)
+ * Report Count (1),
+ * Input (Variable),
+ * Report Count (1),
+ * Input (Constant, Variable),
+ * Usage Page (Desktop), ; Generic desktop controls (01h)
+ * Usage (X), ; X (30h, dynamic value)
+ * Report Size (16),
+ * Report Count (1),
+ * Push,
+ * Unit Exponent (13),
+ * Unit (Inch^3),
+ * Physical Minimum (0),
+ * Physical Maximum (Xpm),
+ * Logical Maximum (Xlm),
+ * Input (Variable),
+ * Usage (Y), ; Y (31h, dynamic value)
+ * Physical Maximum (Ypm),
+ * Logical Maximum (Ylm),
+ * Input (Variable),
+ * Pop,
+ * Usage Page (Digitizer), ; Digitizer (0Dh)
+ * Usage (Tip Pressure), ; Tip pressure (30h, dynamic value)
+ * Logical Maximum (1023),
+ * Input (Variable),
+ * Report Size (16),
+ * End Collection,
+ * End Collection,
+ * Usage Page (Desktop), ; Generic desktop controls (01h)
+ * Usage (Mouse), ; Mouse (02h, application collection)
+ * Collection (Application),
+ * Report ID (8),
+ * Usage (Pointer), ; Pointer (01h, physical collection)
+ * Collection (Physical),
+ * Usage Page (Button), ; Button (09h)
+ * Usage Minimum (01h),
+ * Usage Maximum (03h),
+ * Logical Minimum (0),
+ * Logical Maximum (1),
+ * Report Count (3),
+ * Report Size (1),
+ * Input (Variable),
+ * Report Count (5),
+ * Input (Constant),
+ * Usage Page (Desktop), ; Generic desktop controls (01h)
+ * Usage (X), ; X (30h, dynamic value)
+ * Usage (Y), ; Y (31h, dynamic value)
+ * Usage (Wheel), ; Wheel (38h, dynamic value)
+ * Usage (00h),
+ * Logical Minimum (-127),
+ * Logical Maximum (127),
+ * Report Size (8),
+ * Report Count (4),
+ * Input (Variable, Relative),
+ * End Collection,
+ * End Collection,
+ * Usage Page (Desktop), ; Generic desktop controls (01h)
+ * Usage (Mouse), ; Mouse (02h, application collection)
+ * Collection (Application),
+ * Report ID (9),
+ * Usage (Pointer), ; Pointer (01h, physical collection)
+ * Collection (Physical),
+ * Usage Page (Button), ; Button (09h)
+ * Usage Minimum (01h),
+ * Usage Maximum (03h),
+ * Logical Minimum (0),
+ * Logical Maximum (1),
+ * Report Count (3),
+ * Report Size (1),
+ * Input (Variable),
+ * Report Count (5),
+ * Input (Constant),
+ * Usage Page (Desktop), ; Generic desktop controls (01h)
+ * Usage (X), ; X (30h, dynamic value)
+ * Usage (Y), ; Y (31h, dynamic value)
+ * Logical Minimum (0),
+ * Logical Maximum (32767),
+ * Physical Minimum (0),
+ * Physical Maximum (32767),
+ * Report Count (2),
+ * Report Size (16),
+ * Input (Variable),
+ * Usage Page (Digitizer), ; Digitizer (0Dh)
+ * Usage (Tip Pressure), ; Tip pressure (30h, dynamic value)
+ * Logical Maximum (1023),
+ * Report Count (1),
+ * Report Size (16),
+ * Input (Variable),
+ * End Collection,
+ * End Collection
+ *
+ * Here are the extents values for the WPXXXXU models:
+ *
+ * Xpm Xlm Ypm Ylm
+ * WP4030U 4000 8000 3000 6000
+ * WP5540U 5500 11000 4000 8000
+ * WP8060U 8000 16000 6000 12000
+ *
+ * This suggests that all of them have 2000 LPI resolution, as advertised.
+ */
+
+/* Size of the original descriptor of WPXXXXU tablets */
+#define WPXXXXU_RDESC_ORIG_SIZE 212
+
+/*
+ * Fixed WP4030U report descriptor.
+ * Although the hardware might actually support it, the mouse description
+ * has been removed, since there seems to be no devices having one and it
+ * wouldn't make much sense because of the working area size.
+ */
+static __u8 wp4030u_rdesc_fixed[] = {
+ 0x05, 0x0D, /* Usage Page (Digitizer), */
+ 0x09, 0x02, /* Usage (Pen), */
+ 0xA1, 0x01, /* Collection (Application), */
+ 0x85, 0x09, /* Report ID (9), */
+ 0x09, 0x20, /* Usage (Stylus), */
+ 0xA0, /* Collection (Physical), */
+ 0x75, 0x01, /* Report Size (1), */
+ 0x09, 0x42, /* Usage (Tip Switch), */
+ 0x09, 0x44, /* Usage (Barrel Switch), */
+ 0x09, 0x46, /* Usage (Tablet Pick), */
+ 0x14, /* Logical Minimum (0), */
+ 0x25, 0x01, /* Logical Maximum (1), */
+ 0x95, 0x03, /* Report Count (3), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0x95, 0x05, /* Report Count (5), */
+ 0x81, 0x01, /* Input (Constant), */
+ 0x75, 0x10, /* Report Size (16), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x14, /* Logical Minimum (0), */
+ 0xA4, /* Push, */
+ 0x05, 0x01, /* Usage Page (Desktop), */
+ 0x55, 0xFD, /* Unit Exponent (-3), */
+ 0x65, 0x13, /* Unit (Inch), */
+ 0x34, /* Physical Minimum (0), */
+ 0x09, 0x30, /* Usage (X), */
+ 0x46, 0xA0, 0x0F, /* Physical Maximum (4000), */
+ 0x26, 0xFF, 0x7F, /* Logical Maximum (32767), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0x09, 0x31, /* Usage (Y), */
+ 0x46, 0xB8, 0x0B, /* Physical Maximum (3000), */
+ 0x26, 0xFF, 0x7F, /* Logical Maximum (32767), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0xB4, /* Pop, */
+ 0x09, 0x30, /* Usage (Tip Pressure), */
+ 0x26, 0xFF, 0x03, /* Logical Maximum (1023), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0xC0, /* End Collection, */
+ 0xC0 /* End Collection */
+};
+
+/* Fixed WP5540U report descriptor */
+static __u8 wp5540u_rdesc_fixed[] = {
+ 0x05, 0x0D, /* Usage Page (Digitizer), */
+ 0x09, 0x02, /* Usage (Pen), */
+ 0xA1, 0x01, /* Collection (Application), */
+ 0x85, 0x09, /* Report ID (9), */
+ 0x09, 0x20, /* Usage (Stylus), */
+ 0xA0, /* Collection (Physical), */
+ 0x75, 0x01, /* Report Size (1), */
+ 0x09, 0x42, /* Usage (Tip Switch), */
+ 0x09, 0x44, /* Usage (Barrel Switch), */
+ 0x09, 0x46, /* Usage (Tablet Pick), */
+ 0x14, /* Logical Minimum (0), */
+ 0x25, 0x01, /* Logical Maximum (1), */
+ 0x95, 0x03, /* Report Count (3), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0x95, 0x05, /* Report Count (5), */
+ 0x81, 0x01, /* Input (Constant), */
+ 0x75, 0x10, /* Report Size (16), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x14, /* Logical Minimum (0), */
+ 0xA4, /* Push, */
+ 0x05, 0x01, /* Usage Page (Desktop), */
+ 0x55, 0xFD, /* Unit Exponent (-3), */
+ 0x65, 0x13, /* Unit (Inch), */
+ 0x34, /* Physical Minimum (0), */
+ 0x09, 0x30, /* Usage (X), */
+ 0x46, 0x7C, 0x15, /* Physical Maximum (5500), */
+ 0x26, 0xFF, 0x7F, /* Logical Maximum (32767), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0x09, 0x31, /* Usage (Y), */
+ 0x46, 0xA0, 0x0F, /* Physical Maximum (4000), */
+ 0x26, 0xFF, 0x7F, /* Logical Maximum (32767), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0xB4, /* Pop, */
+ 0x09, 0x30, /* Usage (Tip Pressure), */
+ 0x26, 0xFF, 0x03, /* Logical Maximum (1023), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0xC0, /* End Collection, */
+ 0xC0, /* End Collection, */
+ 0x05, 0x01, /* Usage Page (Desktop), */
+ 0x09, 0x02, /* Usage (Mouse), */
+ 0xA1, 0x01, /* Collection (Application), */
+ 0x85, 0x08, /* Report ID (8), */
+ 0x09, 0x01, /* Usage (Pointer), */
+ 0xA0, /* Collection (Physical), */
+ 0x75, 0x01, /* Report Size (1), */
+ 0x05, 0x09, /* Usage Page (Button), */
+ 0x19, 0x01, /* Usage Minimum (01h), */
+ 0x29, 0x03, /* Usage Maximum (03h), */
+ 0x14, /* Logical Minimum (0), */
+ 0x25, 0x01, /* Logical Maximum (1), */
+ 0x95, 0x03, /* Report Count (3), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0x95, 0x05, /* Report Count (5), */
+ 0x81, 0x01, /* Input (Constant), */
+ 0x05, 0x01, /* Usage Page (Desktop), */
+ 0x75, 0x08, /* Report Size (8), */
+ 0x09, 0x30, /* Usage (X), */
+ 0x09, 0x31, /* Usage (Y), */
+ 0x15, 0x81, /* Logical Minimum (-127), */
+ 0x25, 0x7F, /* Logical Maximum (127), */
+ 0x95, 0x02, /* Report Count (2), */
+ 0x81, 0x06, /* Input (Variable, Relative), */
+ 0x09, 0x38, /* Usage (Wheel), */
+ 0x15, 0xFF, /* Logical Minimum (-1), */
+ 0x25, 0x01, /* Logical Maximum (1), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x81, 0x06, /* Input (Variable, Relative), */
+ 0x81, 0x01, /* Input (Constant), */
+ 0xC0, /* End Collection, */
+ 0xC0 /* End Collection */
+};
+
+/* Fixed WP8060U report descriptor */
+static __u8 wp8060u_rdesc_fixed[] = {
+ 0x05, 0x0D, /* Usage Page (Digitizer), */
+ 0x09, 0x02, /* Usage (Pen), */
+ 0xA1, 0x01, /* Collection (Application), */
+ 0x85, 0x09, /* Report ID (9), */
+ 0x09, 0x20, /* Usage (Stylus), */
+ 0xA0, /* Collection (Physical), */
+ 0x75, 0x01, /* Report Size (1), */
+ 0x09, 0x42, /* Usage (Tip Switch), */
+ 0x09, 0x44, /* Usage (Barrel Switch), */
+ 0x09, 0x46, /* Usage (Tablet Pick), */
+ 0x14, /* Logical Minimum (0), */
+ 0x25, 0x01, /* Logical Maximum (1), */
+ 0x95, 0x03, /* Report Count (3), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0x95, 0x05, /* Report Count (5), */
+ 0x81, 0x01, /* Input (Constant), */
+ 0x75, 0x10, /* Report Size (16), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x14, /* Logical Minimum (0), */
+ 0xA4, /* Push, */
+ 0x05, 0x01, /* Usage Page (Desktop), */
+ 0x55, 0xFD, /* Unit Exponent (-3), */
+ 0x65, 0x13, /* Unit (Inch), */
+ 0x34, /* Physical Minimum (0), */
+ 0x09, 0x30, /* Usage (X), */
+ 0x46, 0x40, 0x1F, /* Physical Maximum (8000), */
+ 0x26, 0xFF, 0x7F, /* Logical Maximum (32767), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0x09, 0x31, /* Usage (Y), */
+ 0x46, 0x70, 0x17, /* Physical Maximum (6000), */
+ 0x26, 0xFF, 0x7F, /* Logical Maximum (32767), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0xB4, /* Pop, */
+ 0x09, 0x30, /* Usage (Tip Pressure), */
+ 0x26, 0xFF, 0x03, /* Logical Maximum (1023), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0xC0, /* End Collection, */
+ 0xC0, /* End Collection, */
+ 0x05, 0x01, /* Usage Page (Desktop), */
+ 0x09, 0x02, /* Usage (Mouse), */
+ 0xA1, 0x01, /* Collection (Application), */
+ 0x85, 0x08, /* Report ID (8), */
+ 0x09, 0x01, /* Usage (Pointer), */
+ 0xA0, /* Collection (Physical), */
+ 0x75, 0x01, /* Report Size (1), */
+ 0x05, 0x09, /* Usage Page (Button), */
+ 0x19, 0x01, /* Usage Minimum (01h), */
+ 0x29, 0x03, /* Usage Maximum (03h), */
+ 0x14, /* Logical Minimum (0), */
+ 0x25, 0x01, /* Logical Maximum (1), */
+ 0x95, 0x03, /* Report Count (3), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0x95, 0x05, /* Report Count (5), */
+ 0x81, 0x01, /* Input (Constant), */
+ 0x05, 0x01, /* Usage Page (Desktop), */
+ 0x75, 0x08, /* Report Size (8), */
+ 0x09, 0x30, /* Usage (X), */
+ 0x09, 0x31, /* Usage (Y), */
+ 0x15, 0x81, /* Logical Minimum (-127), */
+ 0x25, 0x7F, /* Logical Maximum (127), */
+ 0x95, 0x02, /* Report Count (2), */
+ 0x81, 0x06, /* Input (Variable, Relative), */
+ 0x09, 0x38, /* Usage (Wheel), */
+ 0x15, 0xFF, /* Logical Minimum (-1), */
+ 0x25, 0x01, /* Logical Maximum (1), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x81, 0x06, /* Input (Variable, Relative), */
+ 0x81, 0x01, /* Input (Constant), */
+ 0xC0, /* End Collection, */
+ 0xC0 /* End Collection */
+};
+
+/*
+ * Original PF1209 report descriptor.
+ *
+ * The descriptor is similar to WPXXXXU descriptors, with an addition of a
+ * feature report (ID 4) of unknown purpose.
+ *
+ * Although the advertised resolution is 4000 LPI the unused report ID
+ * (taken from WPXXXXU, it seems) states 2000 LPI, but it is probably
+ * incorrect and is a result of blind copying without understanding. Anyway
+ * the real logical extents are always scaled to 0..32767, which IMHO spoils
+ * the precision.
+ *
+ * Usage Page (Digitizer), ; Digitizer (0Dh)
+ * Usage (Pen), ; Pen (02h, application collection)
+ * Collection (Application),
+ * Report ID (7),
+ * Usage (Stylus), ; Stylus (20h, logical collection)
+ * Collection (Physical),
+ * Usage (Tip Switch), ; Tip switch (42h, momentary control)
+ * Usage (Barrel Switch), ; Barrel switch (44h, momentary control)
+ * Usage (Eraser), ; Eraser (45h, momentary control)
+ * Logical Minimum (0),
+ * Logical Maximum (1),
+ * Report Size (1),
+ * Report Count (3),
+ * Input (Variable),
+ * Report Count (3),
+ * Input (Constant, Variable),
+ * Usage (In Range), ; In range (32h, momentary control)
+ * Report Count (1),
+ * Input (Variable),
+ * Report Count (1),
+ * Input (Constant, Variable),
+ * Usage Page (Desktop), ; Generic desktop controls (01h)
+ * Usage (X), ; X (30h, dynamic value)
+ * Report Size (16),
+ * Report Count (1),
+ * Push,
+ * Unit Exponent (13),
+ * Unit (Inch^3),
+ * Physical Minimum (0),
+ * Physical Maximum (12000),
+ * Logical Maximum (24000),
+ * Input (Variable),
+ * Usage (Y), ; Y (31h, dynamic value)
+ * Physical Maximum (9000),
+ * Logical Maximum (18000),
+ * Input (Variable),
+ * Pop,
+ * Usage Page (Digitizer), ; Digitizer (0Dh)
+ * Usage (Tip Pressure), ; Tip pressure (30h, dynamic value)
+ * Logical Maximum (1023),
+ * Input (Variable),
+ * Report Size (16),
+ * End Collection,
+ * End Collection,
+ * Usage Page (Desktop), ; Generic desktop controls (01h)
+ * Usage (Mouse), ; Mouse (02h, application collection)
+ * Collection (Application),
+ * Report ID (8),
+ * Usage (Pointer), ; Pointer (01h, physical collection)
+ * Collection (Physical),
+ * Usage Page (Button), ; Button (09h)
+ * Usage Minimum (01h),
+ * Usage Maximum (03h),
+ * Logical Minimum (0),
+ * Logical Maximum (1),
+ * Report Count (3),
+ * Report Size (1),
+ * Input (Variable),
+ * Report Count (5),
+ * Input (Constant),
+ * Usage Page (Desktop), ; Generic desktop controls (01h)
+ * Usage (X), ; X (30h, dynamic value)
+ * Usage (Y), ; Y (31h, dynamic value)
+ * Usage (Wheel), ; Wheel (38h, dynamic value)
+ * Usage (00h),
+ * Logical Minimum (-127),
+ * Logical Maximum (127),
+ * Report Size (8),
+ * Report Count (4),
+ * Input (Variable, Relative),
+ * End Collection,
+ * End Collection,
+ * Usage Page (Desktop), ; Generic desktop controls (01h)
+ * Usage (Mouse), ; Mouse (02h, application collection)
+ * Collection (Application),
+ * Report ID (9),
+ * Usage (Pointer), ; Pointer (01h, physical collection)
+ * Collection (Physical),
+ * Usage Page (Button), ; Button (09h)
+ * Usage Minimum (01h),
+ * Usage Maximum (03h),
+ * Logical Minimum (0),
+ * Logical Maximum (1),
+ * Report Count (3),
+ * Report Size (1),
+ * Input (Variable),
+ * Report Count (5),
+ * Input (Constant),
+ * Usage Page (Desktop), ; Generic desktop controls (01h)
+ * Usage (X), ; X (30h, dynamic value)
+ * Usage (Y), ; Y (31h, dynamic value)
+ * Logical Minimum (0),
+ * Logical Maximum (32767),
+ * Physical Minimum (0),
+ * Physical Maximum (32767),
+ * Report Count (2),
+ * Report Size (16),
+ * Input (Variable),
+ * Usage Page (Digitizer), ; Digitizer (0Dh)
+ * Usage (Tip Pressure), ; Tip pressure (30h, dynamic value)
+ * Logical Maximum (1023),
+ * Report Count (1),
+ * Report Size (16),
+ * Input (Variable),
+ * End Collection,
+ * End Collection,
+ * Usage Page (Desktop), ; Generic desktop controls (01h)
+ * Usage (00h),
+ * Collection (Application),
+ * Report ID (4),
+ * Logical Minimum (0),
+ * Logical Maximum (255),
+ * Usage (00h),
+ * Report Size (8),
+ * Report Count (3),
+ * Feature (Variable),
+ * End Collection
+ */
+
+/* Size of the original descriptor of PF1209 tablet */
+#define PF1209_RDESC_ORIG_SIZE 234
+
+/*
+ * Fixed PF1209 report descriptor
+ *
+ * The descriptor is fixed similarly to WP5540U and WP8060U, plus the
+ * feature report is removed, because its purpose is unknown and it is of no
+ * use to the generic HID driver anyway for now.
+ */
+static __u8 pf1209_rdesc_fixed[] = {
+ 0x05, 0x0D, /* Usage Page (Digitizer), */
+ 0x09, 0x02, /* Usage (Pen), */
+ 0xA1, 0x01, /* Collection (Application), */
+ 0x85, 0x09, /* Report ID (9), */
+ 0x09, 0x20, /* Usage (Stylus), */
+ 0xA0, /* Collection (Physical), */
+ 0x75, 0x01, /* Report Size (1), */
+ 0x09, 0x42, /* Usage (Tip Switch), */
+ 0x09, 0x44, /* Usage (Barrel Switch), */
+ 0x09, 0x46, /* Usage (Tablet Pick), */
+ 0x14, /* Logical Minimum (0), */
+ 0x25, 0x01, /* Logical Maximum (1), */
+ 0x95, 0x03, /* Report Count (3), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0x95, 0x05, /* Report Count (5), */
+ 0x81, 0x01, /* Input (Constant), */
+ 0x75, 0x10, /* Report Size (16), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x14, /* Logical Minimum (0), */
+ 0xA4, /* Push, */
+ 0x05, 0x01, /* Usage Page (Desktop), */
+ 0x55, 0xFD, /* Unit Exponent (-3), */
+ 0x65, 0x13, /* Unit (Inch), */
+ 0x34, /* Physical Minimum (0), */
+ 0x09, 0x30, /* Usage (X), */
+ 0x46, 0xE0, 0x2E, /* Physical Maximum (12000), */
+ 0x26, 0xFF, 0x7F, /* Logical Maximum (32767), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0x09, 0x31, /* Usage (Y), */
+ 0x46, 0x28, 0x23, /* Physical Maximum (9000), */
+ 0x26, 0xFF, 0x7F, /* Logical Maximum (32767), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0xB4, /* Pop, */
+ 0x09, 0x30, /* Usage (Tip Pressure), */
+ 0x26, 0xFF, 0x03, /* Logical Maximum (1023), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0xC0, /* End Collection, */
+ 0xC0, /* End Collection, */
+ 0x05, 0x01, /* Usage Page (Desktop), */
+ 0x09, 0x02, /* Usage (Mouse), */
+ 0xA1, 0x01, /* Collection (Application), */
+ 0x85, 0x08, /* Report ID (8), */
+ 0x09, 0x01, /* Usage (Pointer), */
+ 0xA0, /* Collection (Physical), */
+ 0x75, 0x01, /* Report Size (1), */
+ 0x05, 0x09, /* Usage Page (Button), */
+ 0x19, 0x01, /* Usage Minimum (01h), */
+ 0x29, 0x03, /* Usage Maximum (03h), */
+ 0x14, /* Logical Minimum (0), */
+ 0x25, 0x01, /* Logical Maximum (1), */
+ 0x95, 0x03, /* Report Count (3), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0x95, 0x05, /* Report Count (5), */
+ 0x81, 0x01, /* Input (Constant), */
+ 0x05, 0x01, /* Usage Page (Desktop), */
+ 0x75, 0x08, /* Report Size (8), */
+ 0x09, 0x30, /* Usage (X), */
+ 0x09, 0x31, /* Usage (Y), */
+ 0x15, 0x81, /* Logical Minimum (-127), */
+ 0x25, 0x7F, /* Logical Maximum (127), */
+ 0x95, 0x02, /* Report Count (2), */
+ 0x81, 0x06, /* Input (Variable, Relative), */
+ 0x09, 0x38, /* Usage (Wheel), */
+ 0x15, 0xFF, /* Logical Minimum (-1), */
+ 0x25, 0x01, /* Logical Maximum (1), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x81, 0x06, /* Input (Variable, Relative), */
+ 0x81, 0x01, /* Input (Constant), */
+ 0xC0, /* End Collection, */
+ 0xC0 /* End Collection */
+};
+
+static __u8 *uclogic_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
+{
+ switch (hdev->product) {
+ case USB_DEVICE_ID_UCLOGIC_TABLET_PF1209:
+ if (*rsize == PF1209_RDESC_ORIG_SIZE) {
+ rdesc = pf1209_rdesc_fixed;
+ *rsize = sizeof(pf1209_rdesc_fixed);
+ }
+ break;
+ case USB_DEVICE_ID_UCLOGIC_TABLET_WP4030U:
+ if (*rsize == WPXXXXU_RDESC_ORIG_SIZE) {
+ rdesc = wp4030u_rdesc_fixed;
+ *rsize = sizeof(wp4030u_rdesc_fixed);
+ }
+ break;
+ case USB_DEVICE_ID_UCLOGIC_TABLET_WP5540U:
+ if (*rsize == WPXXXXU_RDESC_ORIG_SIZE) {
+ rdesc = wp5540u_rdesc_fixed;
+ *rsize = sizeof(wp5540u_rdesc_fixed);
+ }
+ break;
+ case USB_DEVICE_ID_UCLOGIC_TABLET_WP8060U:
+ if (*rsize == WPXXXXU_RDESC_ORIG_SIZE) {
+ rdesc = wp8060u_rdesc_fixed;
+ *rsize = sizeof(wp8060u_rdesc_fixed);
+ }
+ break;
+ }
+
+ return rdesc;
+}
+
+static const struct hid_device_id uclogic_devices[] = {
+ { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
+ USB_DEVICE_ID_UCLOGIC_TABLET_PF1209) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
+ USB_DEVICE_ID_UCLOGIC_TABLET_WP4030U) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
+ USB_DEVICE_ID_UCLOGIC_TABLET_WP5540U) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
+ USB_DEVICE_ID_UCLOGIC_TABLET_WP8060U) },
+ { }
+};
+MODULE_DEVICE_TABLE(hid, uclogic_devices);
+
+static struct hid_driver uclogic_driver = {
+ .name = "uclogic",
+ .id_table = uclogic_devices,
+ .report_fixup = uclogic_report_fixup,
+};
+
+static int __init uclogic_init(void)
+{
+ return hid_register_driver(&uclogic_driver);
+}
+
+static void __exit uclogic_exit(void)
+{
+ hid_unregister_driver(&uclogic_driver);
+}
+
+module_init(uclogic_init);
+module_exit(uclogic_exit);
+MODULE_LICENSE("GPL");
diff --git a/drivers/hid/hid-waltop.c b/drivers/hid/hid-waltop.c
new file mode 100644
index 0000000..b3a4163
--- /dev/null
+++ b/drivers/hid/hid-waltop.c
@@ -0,0 +1,1099 @@
+/*
+ * HID driver for Waltop devices not fully compliant with HID standard
+ *
+ * Copyright (c) 2010 Nikolai Kondrashov
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/hid.h>
+#include <linux/module.h>
+
+#include "hid-ids.h"
+
+/*
+ * There exists an official driver on the manufacturer's website, which
+ * wasn't submitted to the kernel, for some reason. The official driver
+ * doesn't seem to support extra features of some tablets, like wheels.
+ *
+ * It shows that the feature report ID 2 could be used to control any waltop
+ * tablet input mode, switching it between "default", "tablet" and "ink".
+ *
+ * This driver only uses "default" mode for all the supported tablets. This
+ * mode tries to be HID-compatible (not very successfully), but cripples the
+ * resolution of some tablets.
+ *
+ * The "tablet" mode uses some proprietary, yet decipherable protocol, which
+ * represents the correct resolution, but is possibly HID-incompatible (i.e.
+ * indescribable by a report descriptor).
+ *
+ * The purpose of the "ink" mode is unknown.
+ *
+ * The feature reports needed for switching to each mode are these:
+ *
+ * 02 16 00 default
+ * 02 16 01 tablet
+ * 02 16 02 ink
+ */
+
+/*
+ * Original Slim Tablet 5.8 inch report descriptor.
+ *
+ * All the reports except the report with ID 16 (the stylus) are unused,
+ * possibly because the tablet is not configured to, or because they were
+ * just copied from a more capable model. The full purpose of features
+ * described for report ID 2 is unknown.
+ *
+ * The stylus buttons are described as three bit fields, whereas actually
+ * it's an "array", i.e. they're reported as button numbers (1, 2 and 3).
+ * The "eraser" field is not used. There is also a "push" without a "pop" in
+ * the stylus description.
+ *
+ * Usage Page (Desktop), ; Generic desktop controls (01h)
+ * Usage (Mouse), ; Mouse (02h, application collection)
+ * Collection (Application),
+ * Report ID (1),
+ * Usage (Pointer), ; Pointer (01h, physical collection)
+ * Collection (Physical),
+ * Usage Page (Button), ; Button (09h)
+ * Usage Minimum (01h),
+ * Usage Maximum (05h),
+ * Logical Minimum (0),
+ * Logical Maximum (1),
+ * Report Size (1),
+ * Report Count (5),
+ * Input (Variable),
+ * Report Size (3),
+ * Report Count (1),
+ * Input (Constant, Variable),
+ * Usage Page (Desktop), ; Generic desktop controls (01h)
+ * Usage (X), ; X (30h, dynamic value)
+ * Usage (Y), ; Y (31h, dynamic value)
+ * Usage (Wheel), ; Wheel (38h, dynamic value)
+ * Logical Minimum (-127),
+ * Logical Maximum (127),
+ * Report Size (8),
+ * Report Count (3),
+ * Input (Variable, Relative),
+ * End Collection,
+ * End Collection,
+ * Usage Page (Digitizer), ; Digitizer (0Dh)
+ * Usage (Pen), ; Pen (02h, application collection)
+ * Collection (Application),
+ * Report ID (2),
+ * Usage (Stylus), ; Stylus (20h, logical collection)
+ * Collection (Physical),
+ * Usage (00h),
+ * Logical Minimum (0),
+ * Logical Maximum (255),
+ * Report Size (8),
+ * Report Count (7),
+ * Input (Variable),
+ * Usage (Azimuth), ; Azimuth (3Fh, dynamic value)
+ * Usage (Altitude), ; Altitude (40h, dynamic value)
+ * Logical Minimum (0),
+ * Logical Maximum (255),
+ * Report Size (8),
+ * Report Count (2),
+ * Feature (Variable),
+ * End Collection,
+ * Report ID (5),
+ * Usage Page (Digitizer), ; Digitizer (0Dh)
+ * Usage (Stylus), ; Stylus (20h, logical collection)
+ * Collection (Physical),
+ * Usage (00h),
+ * Logical Minimum (0),
+ * Logical Maximum (255),
+ * Report Size (8),
+ * Report Count (7),
+ * Input (Variable),
+ * End Collection,
+ * Report ID (10),
+ * Usage Page (Digitizer), ; Digitizer (0Dh)
+ * Usage (Stylus), ; Stylus (20h, logical collection)
+ * Collection (Physical),
+ * Usage (00h),
+ * Logical Minimum (0),
+ * Logical Maximum (255),
+ * Report Size (8),
+ * Report Count (3),
+ * Input (Variable),
+ * End Collection,
+ * Report ID (16),
+ * Usage (Stylus), ; Stylus (20h, logical collection)
+ * Collection (Physical),
+ * Usage (Tip Switch), ; Tip switch (42h, momentary control)
+ * Usage (Barrel Switch), ; Barrel switch (44h, momentary control)
+ * Usage (Invert), ; Invert (3Ch, momentary control)
+ * Usage (Eraser), ; Eraser (45h, momentary control)
+ * Usage (In Range), ; In range (32h, momentary control)
+ * Logical Minimum (0),
+ * Logical Maximum (1),
+ * Report Size (1),
+ * Report Count (5),
+ * Input (Variable),
+ * Report Count (3),
+ * Input (Constant, Variable),
+ * Usage Page (Desktop), ; Generic desktop controls (01h)
+ * Usage (X), ; X (30h, dynamic value)
+ * Report Size (16),
+ * Report Count (1),
+ * Push,
+ * Unit Exponent (13),
+ * Unit (Inch^3),
+ * Logical Minimum (0),
+ * Logical Maximum (10000),
+ * Physical Minimum (0),
+ * Physical Maximum (10000),
+ * Input (Variable),
+ * Usage (Y), ; Y (31h, dynamic value)
+ * Logical Maximum (6000),
+ * Physical Maximum (6000),
+ * Input (Variable),
+ * Usage Page (Digitizer), ; Digitizer (0Dh)
+ * Usage (Tip Pressure), ; Tip pressure (30h, dynamic value)
+ * Logical Minimum (0),
+ * Logical Maximum (1023),
+ * Physical Minimum (0),
+ * Physical Maximum (1023),
+ * Input (Variable),
+ * End Collection,
+ * End Collection
+ */
+
+/* Size of the original report descriptor of Slim Tablet 5.8 inch */
+#define SLIM_TABLET_5_8_INCH_RDESC_ORIG_SIZE 222
+
+/*
+ * Fixed Slim Tablet 5.8 inch descriptor.
+ *
+ * All the reports except the stylus report (ID 16) were removed as unused.
+ * The stylus buttons description was fixed.
+ */
+static __u8 slim_tablet_5_8_inch_rdesc_fixed[] = {
+ 0x05, 0x0D, /* Usage Page (Digitizer), */
+ 0x09, 0x02, /* Usage (Pen), */
+ 0xA1, 0x01, /* Collection (Application), */
+ 0x85, 0x10, /* Report ID (16), */
+ 0x09, 0x20, /* Usage (Stylus), */
+ 0xA0, /* Collection (Physical), */
+ 0x09, 0x42, /* Usage (Tip Switch), */
+ 0x09, 0x44, /* Usage (Barrel Switch), */
+ 0x09, 0x46, /* Usage (Tablet Pick), */
+ 0x15, 0x01, /* Logical Minimum (1), */
+ 0x25, 0x03, /* Logical Maximum (3), */
+ 0x75, 0x04, /* Report Size (4), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x80, /* Input, */
+ 0x09, 0x32, /* Usage (In Range), */
+ 0x14, /* Logical Minimum (0), */
+ 0x25, 0x01, /* Logical Maximum (1), */
+ 0x75, 0x01, /* Report Size (1), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0x95, 0x03, /* Report Count (3), */
+ 0x81, 0x03, /* Input (Constant, Variable), */
+ 0x75, 0x10, /* Report Size (16), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x14, /* Logical Minimum (0), */
+ 0xA4, /* Push, */
+ 0x05, 0x01, /* Usage Page (Desktop), */
+ 0x65, 0x13, /* Unit (Inch), */
+ 0x55, 0xFD, /* Unit Exponent (-3), */
+ 0x34, /* Physical Minimum (0), */
+ 0x09, 0x30, /* Usage (X), */
+ 0x46, 0x88, 0x13, /* Physical Maximum (5000), */
+ 0x26, 0x10, 0x27, /* Logical Maximum (10000), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0x09, 0x31, /* Usage (Y), */
+ 0x46, 0xB8, 0x0B, /* Physical Maximum (3000), */
+ 0x26, 0x70, 0x17, /* Logical Maximum (6000), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0xB4, /* Pop, */
+ 0x09, 0x30, /* Usage (Tip Pressure), */
+ 0x26, 0xFF, 0x03, /* Logical Maximum (1023), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0xC0, /* End Collection, */
+ 0xC0 /* End Collection */
+};
+
+/*
+ * Original Slim Tablet 12.1 inch report descriptor.
+ *
+ * The descriptor is similar to the Slim Tablet 5.8 inch descriptor with the
+ * addition of a keyboard report, seemingly unused. It may have get here
+ * from a Media Tablet - probably an unimplemented feature.
+ *
+ * Usage Page (Desktop), ; Generic desktop controls (01h)
+ * Usage (Mouse), ; Mouse (02h, application collection)
+ * Collection (Application),
+ * Report ID (1),
+ * Usage (Pointer), ; Pointer (01h, physical collection)
+ * Collection (Physical),
+ * Usage Page (Button), ; Button (09h)
+ * Usage Minimum (01h),
+ * Usage Maximum (05h),
+ * Logical Minimum (0),
+ * Logical Maximum (1),
+ * Report Size (1),
+ * Report Count (5),
+ * Input (Variable),
+ * Report Size (3),
+ * Report Count (1),
+ * Input (Constant, Variable),
+ * Usage Page (Desktop), ; Generic desktop controls (01h)
+ * Usage (X), ; X (30h, dynamic value)
+ * Usage (Y), ; Y (31h, dynamic value)
+ * Usage (Wheel), ; Wheel (38h, dynamic value)
+ * Logical Minimum (-127),
+ * Logical Maximum (127),
+ * Report Size (8),
+ * Report Count (3),
+ * Input (Variable, Relative),
+ * End Collection,
+ * End Collection,
+ * Usage Page (Digitizer), ; Digitizer (0Dh)
+ * Usage (Pen), ; Pen (02h, application collection)
+ * Collection (Application),
+ * Report ID (2),
+ * Usage (Stylus), ; Stylus (20h, logical collection)
+ * Collection (Physical),
+ * Usage (00h),
+ * Logical Minimum (0),
+ * Logical Maximum (255),
+ * Report Size (8),
+ * Report Count (7),
+ * Input (Variable),
+ * Usage (Azimuth), ; Azimuth (3Fh, dynamic value)
+ * Usage (Altitude), ; Altitude (40h, dynamic value)
+ * Logical Minimum (0),
+ * Logical Maximum (255),
+ * Report Size (8),
+ * Report Count (2),
+ * Feature (Variable),
+ * End Collection,
+ * Report ID (5),
+ * Usage Page (Digitizer), ; Digitizer (0Dh)
+ * Usage (Stylus), ; Stylus (20h, logical collection)
+ * Collection (Physical),
+ * Usage (00h),
+ * Logical Minimum (0),
+ * Logical Maximum (255),
+ * Report Size (8),
+ * Report Count (7),
+ * Input (Variable),
+ * End Collection,
+ * Report ID (10),
+ * Usage Page (Digitizer), ; Digitizer (0Dh)
+ * Usage (Stylus), ; Stylus (20h, logical collection)
+ * Collection (Physical),
+ * Usage (00h),
+ * Logical Minimum (0),
+ * Logical Maximum (255),
+ * Report Size (8),
+ * Report Count (3),
+ * Input (Variable),
+ * End Collection,
+ * Report ID (16),
+ * Usage (Stylus), ; Stylus (20h, logical collection)
+ * Collection (Physical),
+ * Usage (Tip Switch), ; Tip switch (42h, momentary control)
+ * Usage (Barrel Switch), ; Barrel switch (44h, momentary control)
+ * Usage (Invert), ; Invert (3Ch, momentary control)
+ * Usage (Eraser), ; Eraser (45h, momentary control)
+ * Usage (In Range), ; In range (32h, momentary control)
+ * Logical Minimum (0),
+ * Logical Maximum (1),
+ * Report Size (1),
+ * Report Count (5),
+ * Input (Variable),
+ * Report Count (3),
+ * Input (Constant, Variable),
+ * Usage Page (Desktop), ; Generic desktop controls (01h)
+ * Usage (X), ; X (30h, dynamic value)
+ * Report Size (16),
+ * Report Count (1),
+ * Push,
+ * Unit Exponent (13),
+ * Unit (Inch^3),
+ * Logical Minimum (0),
+ * Logical Maximum (20000),
+ * Physical Minimum (0),
+ * Physical Maximum (20000),
+ * Input (Variable),
+ * Usage (Y), ; Y (31h, dynamic value)
+ * Logical Maximum (12500),
+ * Physical Maximum (12500),
+ * Input (Variable),
+ * Usage Page (Digitizer), ; Digitizer (0Dh)
+ * Usage (Tip Pressure), ; Tip pressure (30h, dynamic value)
+ * Logical Minimum (0),
+ * Logical Maximum (1023),
+ * Physical Minimum (0),
+ * Physical Maximum (1023),
+ * Input (Variable),
+ * End Collection,
+ * End Collection,
+ * Usage Page (Desktop), ; Generic desktop controls (01h)
+ * Usage (Keyboard), ; Keyboard (06h, application collection)
+ * Collection (Application),
+ * Report ID (13),
+ * Usage Page (Keyboard), ; Keyboard/keypad (07h)
+ * Usage Minimum (KB Leftcontrol), ; Keyboard left control
+ * ; (E0h, dynamic value)
+ * Usage Maximum (KB Right GUI), ; Keyboard right GUI (E7h, dynamic value)
+ * Logical Minimum (0),
+ * Logical Maximum (1),
+ * Report Size (1),
+ * Report Count (8),
+ * Input (Variable),
+ * Report Size (8),
+ * Report Count (1),
+ * Input (Constant),
+ * Usage Page (Keyboard), ; Keyboard/keypad (07h)
+ * Usage Minimum (None), ; No event (00h, selector)
+ * Usage Maximum (KB Application), ; Keyboard Application (65h, selector)
+ * Logical Minimum (0),
+ * Logical Maximum (101),
+ * Report Size (8),
+ * Report Count (5),
+ * Input,
+ * End Collection
+ */
+
+/* Size of the original report descriptor of Slim Tablet 12.1 inch */
+#define SLIM_TABLET_12_1_INCH_RDESC_ORIG_SIZE 269
+
+/*
+ * Fixed Slim Tablet 12.1 inch descriptor.
+ *
+ * All the reports except the stylus report (ID 16) were removed as unused.
+ * The stylus buttons description was fixed.
+ */
+static __u8 slim_tablet_12_1_inch_rdesc_fixed[] = {
+ 0x05, 0x0D, /* Usage Page (Digitizer), */
+ 0x09, 0x02, /* Usage (Pen), */
+ 0xA1, 0x01, /* Collection (Application), */
+ 0x85, 0x10, /* Report ID (16), */
+ 0x09, 0x20, /* Usage (Stylus), */
+ 0xA0, /* Collection (Physical), */
+ 0x09, 0x42, /* Usage (Tip Switch), */
+ 0x09, 0x44, /* Usage (Barrel Switch), */
+ 0x09, 0x46, /* Usage (Tablet Pick), */
+ 0x15, 0x01, /* Logical Minimum (1), */
+ 0x25, 0x03, /* Logical Maximum (3), */
+ 0x75, 0x04, /* Report Size (4), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x80, /* Input, */
+ 0x09, 0x32, /* Usage (In Range), */
+ 0x14, /* Logical Minimum (0), */
+ 0x25, 0x01, /* Logical Maximum (1), */
+ 0x75, 0x01, /* Report Size (1), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0x95, 0x03, /* Report Count (3), */
+ 0x81, 0x03, /* Input (Constant, Variable), */
+ 0x75, 0x10, /* Report Size (16), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x14, /* Logical Minimum (0), */
+ 0xA4, /* Push, */
+ 0x05, 0x01, /* Usage Page (Desktop), */
+ 0x65, 0x13, /* Unit (Inch), */
+ 0x55, 0xFD, /* Unit Exponent (-3), */
+ 0x34, /* Physical Minimum (0), */
+ 0x09, 0x30, /* Usage (X), */
+ 0x46, 0x10, 0x27, /* Physical Maximum (10000), */
+ 0x26, 0x20, 0x4E, /* Logical Maximum (20000), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0x09, 0x31, /* Usage (Y), */
+ 0x46, 0x6A, 0x18, /* Physical Maximum (6250), */
+ 0x26, 0xD4, 0x30, /* Logical Maximum (12500), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0xB4, /* Pop, */
+ 0x09, 0x30, /* Usage (Tip Pressure), */
+ 0x26, 0xFF, 0x03, /* Logical Maximum (1023), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0xC0, /* End Collection, */
+ 0xC0 /* End Collection */
+};
+
+/*
+ * Original Media Tablet 10.6 inch report descriptor.
+ *
+ * There are at least two versions of this model in the wild. They are
+ * represented by Genius G-Pen M609 (older version) and Genius G-Pen M609X
+ * (newer version).
+ *
+ * Both versions have the usual pen with two barrel buttons and two
+ * identical wheels with center buttons in the top corners of the tablet
+ * base. They also have buttons on the top, between the wheels, for
+ * selecting the wheels' functions and wide/standard mode. In the wide mode
+ * the whole working surface is sensed, in the standard mode a narrower area
+ * is sensed, but the logical report extents remain the same. These modes
+ * correspond roughly to 16:9 and 4:3 aspect ratios respectively.
+ *
+ * The older version has three wheel function buttons ("scroll", "zoom" and
+ * "volume") and two separate buttons for wide and standard mode. The newer
+ * version has four wheel function buttons (plus "brush") and only one
+ * button is used for selecting wide/standard mode. So, the total number of
+ * buttons remains the same, but one of the mode buttons is repurposed as a
+ * wheels' function button in the newer version.
+ *
+ * The wheel functions are:
+ * scroll - the wheels act as scroll wheels, the center buttons switch
+ * between vertical and horizontal scrolling;
+ * zoom - the wheels zoom in/out, the buttons supposedly reset to 100%;
+ * volume - the wheels control the sound volume, the buttons mute;
+ * brush - the wheels are supposed to control brush width in a graphics
+ * editor, the buttons do nothing.
+ *
+ * Below is the newer version's report descriptor. It may very well be that
+ * the older version's descriptor is different and thus it won't be
+ * supported.
+ *
+ * The mouse report (ID 1) only uses the wheel field for reporting the tablet
+ * wheels' scroll mode. The keyboard report (ID 13) is used to report the
+ * wheels' zoom and brush control functions as key presses. The report ID 12
+ * is used to report the wheels' volume control functions. The stylus report
+ * (ID 16) has the same problems as the Slim Tablet 5.8 inch report has.
+ *
+ * The rest of the reports are unused, at least in the default configuration.
+ * The purpose of the features is unknown.
+ *
+ * Usage Page (Desktop),
+ * Usage (Mouse),
+ * Collection (Application),
+ * Report ID (1),
+ * Usage (Pointer),
+ * Collection (Physical),
+ * Usage Page (Button),
+ * Usage Minimum (01h),
+ * Usage Maximum (05h),
+ * Logical Minimum (0),
+ * Logical Maximum (1),
+ * Report Size (1),
+ * Report Count (5),
+ * Input (Variable),
+ * Report Size (3),
+ * Report Count (1),
+ * Input (Constant, Variable),
+ * Usage Page (Desktop),
+ * Usage (X),
+ * Usage (Y),
+ * Usage (Wheel),
+ * Logical Minimum (-127),
+ * Logical Maximum (127),
+ * Report Size (8),
+ * Report Count (3),
+ * Input (Variable, Relative),
+ * End Collection,
+ * End Collection,
+ * Usage Page (Digitizer),
+ * Usage (Pen),
+ * Collection (Application),
+ * Report ID (2),
+ * Usage (Stylus),
+ * Collection (Physical),
+ * Usage (00h),
+ * Logical Minimum (0),
+ * Logical Maximum (255),
+ * Report Size (8),
+ * Report Count (7),
+ * Input (Variable),
+ * Usage (Azimuth),
+ * Usage (Altitude),
+ * Logical Minimum (0),
+ * Logical Maximum (255),
+ * Report Size (8),
+ * Report Count (2),
+ * Feature (Variable),
+ * End Collection,
+ * Report ID (5),
+ * Usage Page (Digitizer),
+ * Usage (Stylus),
+ * Collection (Physical),
+ * Usage (00h),
+ * Logical Minimum (0),
+ * Logical Maximum (255),
+ * Report Size (8),
+ * Report Count (7),
+ * Input (Variable),
+ * End Collection,
+ * Report ID (10),
+ * Usage Page (Digitizer),
+ * Usage (Stylus),
+ * Collection (Physical),
+ * Usage (00h),
+ * Logical Minimum (0),
+ * Logical Maximum (255),
+ * Report Size (8),
+ * Report Count (7),
+ * Input (Variable),
+ * End Collection,
+ * Report ID (16),
+ * Usage (Stylus),
+ * Collection (Physical),
+ * Usage (Tip Switch),
+ * Usage (Barrel Switch),
+ * Usage (Invert),
+ * Usage (Eraser),
+ * Usage (In Range),
+ * Logical Minimum (0),
+ * Logical Maximum (1),
+ * Report Size (1),
+ * Report Count (5),
+ * Input (Variable),
+ * Report Count (3),
+ * Input (Constant, Variable),
+ * Usage Page (Desktop),
+ * Usage (X),
+ * Report Size (16),
+ * Report Count (1),
+ * Push,
+ * Unit Exponent (13),
+ * Unit (Inch^3),
+ * Logical Minimum (0),
+ * Logical Maximum (18000),
+ * Physical Minimum (0),
+ * Physical Maximum (18000),
+ * Input (Variable),
+ * Usage (Y),
+ * Logical Maximum (11000),
+ * Physical Maximum (11000),
+ * Input (Variable),
+ * Usage Page (Digitizer),
+ * Usage (Tip Pressure),
+ * Logical Minimum (0),
+ * Logical Maximum (1023),
+ * Physical Minimum (0),
+ * Physical Maximum (1023),
+ * Input (Variable),
+ * End Collection,
+ * End Collection,
+ * Usage Page (Desktop),
+ * Usage (Keyboard),
+ * Collection (Application),
+ * Report ID (13),
+ * Usage Page (Keyboard),
+ * Usage Minimum (KB Leftcontrol),
+ * Usage Maximum (KB Right GUI),
+ * Logical Minimum (0),
+ * Logical Maximum (1),
+ * Report Size (1),
+ * Report Count (8),
+ * Input (Variable),
+ * Report Size (8),
+ * Report Count (1),
+ * Input (Constant),
+ * Usage Page (Keyboard),
+ * Usage Minimum (None),
+ * Usage Maximum (KB Application),
+ * Logical Minimum (0),
+ * Logical Maximum (101),
+ * Report Size (8),
+ * Report Count (5),
+ * Input,
+ * End Collection,
+ * Usage Page (Consumer),
+ * Usage (Consumer Control),
+ * Collection (Application),
+ * Report ID (12),
+ * Usage (Volume Inc),
+ * Usage (Volume Dec),
+ * Usage (Mute),
+ * Logical Minimum (0),
+ * Logical Maximum (1),
+ * Report Size (1),
+ * Report Count (3),
+ * Input (Variable, Relative),
+ * Report Size (5),
+ * Report Count (1),
+ * Input (Constant, Variable, Relative),
+ * End Collection
+ */
+
+/* Size of the original report descriptor of Media Tablet 10.6 inch */
+#define MEDIA_TABLET_10_6_INCH_RDESC_ORIG_SIZE 300
+
+/*
+ * Fixed Media Tablet 10.6 inch descriptor.
+ *
+ * The descriptions of reports unused in the default configuration are
+ * removed. The stylus report (ID 16) is fixed similarly to Slim Tablet 5.8
+ * inch. The unused mouse report (ID 1) fields are replaced with constant
+ * padding.
+ *
+ * The keyboard report (ID 13) is hacked to instead have an "array" field
+ * reporting consumer page controls, and all the unused bits are masked out
+ * with constant padding. The "brush" wheels' function is represented as "Scan
+ * Previous/Next Track" controls due to the lack of brush controls in the
+ * usage tables specification.
+ */
+static __u8 media_tablet_10_6_inch_rdesc_fixed[] = {
+ 0x05, 0x0D, /* Usage Page (Digitizer), */
+ 0x09, 0x02, /* Usage (Pen), */
+ 0xA1, 0x01, /* Collection (Application), */
+ 0x85, 0x10, /* Report ID (16), */
+ 0x09, 0x20, /* Usage (Stylus), */
+ 0xA0, /* Collection (Physical), */
+ 0x09, 0x42, /* Usage (Tip Switch), */
+ 0x09, 0x44, /* Usage (Barrel Switch), */
+ 0x09, 0x46, /* Usage (Tablet Pick), */
+ 0x15, 0x01, /* Logical Minimum (1), */
+ 0x25, 0x03, /* Logical Maximum (3), */
+ 0x75, 0x04, /* Report Size (4), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x80, /* Input, */
+ 0x75, 0x01, /* Report Size (1), */
+ 0x09, 0x32, /* Usage (In Range), */
+ 0x14, /* Logical Minimum (0), */
+ 0x25, 0x01, /* Logical Maximum (1), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0x95, 0x03, /* Report Count (3), */
+ 0x81, 0x03, /* Input (Constant, Variable), */
+ 0x75, 0x10, /* Report Size (16), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x14, /* Logical Minimum (0), */
+ 0xA4, /* Push, */
+ 0x05, 0x01, /* Usage Page (Desktop), */
+ 0x65, 0x13, /* Unit (Inch), */
+ 0x55, 0xFD, /* Unit Exponent (-3), */
+ 0x34, /* Physical Minimum (0), */
+ 0x09, 0x30, /* Usage (X), */
+ 0x46, 0x28, 0x23, /* Physical Maximum (9000), */
+ 0x26, 0x50, 0x46, /* Logical Maximum (18000), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0x09, 0x31, /* Usage (Y), */
+ 0x46, 0x7C, 0x15, /* Physical Maximum (5500), */
+ 0x26, 0xF8, 0x2A, /* Logical Maximum (11000), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0xB4, /* Pop, */
+ 0x09, 0x30, /* Usage (Tip Pressure), */
+ 0x26, 0xFF, 0x03, /* Logical Maximum (1023), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0xC0, /* End Collection, */
+ 0xC0, /* End Collection, */
+ 0x05, 0x01, /* Usage Page (Desktop), */
+ 0x09, 0x02, /* Usage (Mouse), */
+ 0xA1, 0x01, /* Collection (Application), */
+ 0x85, 0x01, /* Report ID (1), */
+ 0x09, 0x01, /* Usage (Pointer), */
+ 0xA0, /* Collection (Physical), */
+ 0x75, 0x08, /* Report Size (8), */
+ 0x95, 0x03, /* Report Count (3), */
+ 0x81, 0x03, /* Input (Constant, Variable), */
+ 0x95, 0x02, /* Report Count (2), */
+ 0x15, 0xFF, /* Logical Minimum (-1), */
+ 0x25, 0x01, /* Logical Maximum (1), */
+ 0x09, 0x38, /* Usage (Wheel), */
+ 0x0B, 0x38, 0x02, /* Usage (Consumer AC Pan), */
+ 0x0C, 0x00,
+ 0x81, 0x06, /* Input (Variable, Relative), */
+ 0x95, 0x02, /* Report Count (2), */
+ 0x81, 0x03, /* Input (Constant, Variable), */
+ 0xC0, /* End Collection, */
+ 0xC0, /* End Collection, */
+ 0x05, 0x0C, /* Usage Page (Consumer), */
+ 0x09, 0x01, /* Usage (Consumer Control), */
+ 0xA1, 0x01, /* Collection (Application), */
+ 0x85, 0x0D, /* Report ID (13), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x75, 0x10, /* Report Size (16), */
+ 0x81, 0x03, /* Input (Constant, Variable), */
+ 0x0A, 0x2F, 0x02, /* Usage (AC Zoom), */
+ 0x0A, 0x2E, 0x02, /* Usage (AC Zoom Out), */
+ 0x0A, 0x2D, 0x02, /* Usage (AC Zoom In), */
+ 0x09, 0xB6, /* Usage (Scan Previous Track), */
+ 0x09, 0xB5, /* Usage (Scan Next Track), */
+ 0x08, /* Usage (00h), */
+ 0x08, /* Usage (00h), */
+ 0x08, /* Usage (00h), */
+ 0x08, /* Usage (00h), */
+ 0x08, /* Usage (00h), */
+ 0x0A, 0x2E, 0x02, /* Usage (AC Zoom Out), */
+ 0x0A, 0x2D, 0x02, /* Usage (AC Zoom In), */
+ 0x15, 0x0C, /* Logical Minimum (12), */
+ 0x25, 0x17, /* Logical Maximum (23), */
+ 0x75, 0x05, /* Report Size (5), */
+ 0x80, /* Input, */
+ 0x75, 0x03, /* Report Size (3), */
+ 0x81, 0x03, /* Input (Constant, Variable), */
+ 0x75, 0x20, /* Report Size (32), */
+ 0x81, 0x03, /* Input (Constant, Variable), */
+ 0xC0, /* End Collection, */
+ 0x09, 0x01, /* Usage (Consumer Control), */
+ 0xA1, 0x01, /* Collection (Application), */
+ 0x85, 0x0C, /* Report ID (12), */
+ 0x75, 0x01, /* Report Size (1), */
+ 0x09, 0xE9, /* Usage (Volume Inc), */
+ 0x09, 0xEA, /* Usage (Volume Dec), */
+ 0x09, 0xE2, /* Usage (Mute), */
+ 0x14, /* Logical Minimum (0), */
+ 0x25, 0x01, /* Logical Maximum (1), */
+ 0x95, 0x03, /* Report Count (3), */
+ 0x81, 0x06, /* Input (Variable, Relative), */
+ 0x95, 0x35, /* Report Count (53), */
+ 0x81, 0x03, /* Input (Constant, Variable), */
+ 0xC0 /* End Collection */
+};
+
+/*
+ * Original Media Tablet 14.1 inch report descriptor.
+ *
+ * There are at least two versions of this model in the wild. They are
+ * represented by Genius G-Pen M712 (older version) and Genius G-Pen M712X
+ * (newer version). The hardware difference between these versions is the same
+ * as between older and newer versions of Media Tablet 10.6 inch. The report
+ * descriptors are identical for both versions.
+ *
+ * The function, behavior and report descriptor of this tablet is similar to
+ * that of Media Tablet 10.6 inch. However, there is one more field (with
+ * Consumer AC Pan usage) in the mouse description. Then the tablet X and Y
+ * logical extents both get scaled to 0..16383 range (a hardware limit?),
+ * which kind of defeats the advertised 4000 LPI resolution, considering the
+ * physical extents of 12x7.25 inches. Plus, reports 5, 10 and 255 are used
+ * sometimes (while moving the pen) with unknown purpose. Also, the key codes
+ * generated for zoom in/out are different.
+ *
+ * Usage Page (Desktop),
+ * Usage (Mouse),
+ * Collection (Application),
+ * Report ID (1),
+ * Usage (Pointer),
+ * Collection (Physical),
+ * Usage Page (Button),
+ * Usage Minimum (01h),
+ * Usage Maximum (05h),
+ * Logical Minimum (0),
+ * Logical Maximum (1),
+ * Report Size (1),
+ * Report Count (5),
+ * Input (Variable),
+ * Report Size (3),
+ * Report Count (1),
+ * Input (Constant, Variable),
+ * Usage Page (Desktop),
+ * Usage (X),
+ * Usage (Y),
+ * Usage (Wheel),
+ * Logical Minimum (-127),
+ * Logical Maximum (127),
+ * Report Size (8),
+ * Report Count (3),
+ * Input (Variable, Relative),
+ * Usage Page (Consumer),
+ * Logical Minimum (-127),
+ * Logical Maximum (127),
+ * Report Size (8),
+ * Report Count (1),
+ * Usage (AC Pan),
+ * Input (Variable, Relative),
+ * End Collection,
+ * End Collection,
+ * Usage Page (Digitizer),
+ * Usage (Pen),
+ * Collection (Application),
+ * Report ID (2),
+ * Usage (Stylus),
+ * Collection (Physical),
+ * Usage (00h),
+ * Logical Minimum (0),
+ * Logical Maximum (255),
+ * Report Size (8),
+ * Report Count (7),
+ * Input (Variable),
+ * Usage (Azimuth),
+ * Usage (Altitude),
+ * Logical Minimum (0),
+ * Logical Maximum (255),
+ * Report Size (8),
+ * Report Count (2),
+ * Feature (Variable),
+ * End Collection,
+ * Report ID (5),
+ * Usage Page (Digitizer),
+ * Usage (Stylus),
+ * Collection (Physical),
+ * Usage (00h),
+ * Logical Minimum (0),
+ * Logical Maximum (255),
+ * Report Size (8),
+ * Report Count (7),
+ * Input (Variable),
+ * End Collection,
+ * Report ID (10),
+ * Usage Page (Digitizer),
+ * Usage (Stylus),
+ * Collection (Physical),
+ * Usage (00h),
+ * Logical Minimum (0),
+ * Logical Maximum (255),
+ * Report Size (8),
+ * Report Count (7),
+ * Input (Variable),
+ * End Collection,
+ * Report ID (16),
+ * Usage (Stylus),
+ * Collection (Physical),
+ * Usage (Tip Switch),
+ * Usage (Barrel Switch),
+ * Usage (Invert),
+ * Usage (Eraser),
+ * Usage (In Range),
+ * Logical Minimum (0),
+ * Logical Maximum (1),
+ * Report Size (1),
+ * Report Count (5),
+ * Input (Variable),
+ * Report Count (3),
+ * Input (Constant, Variable),
+ * Usage Page (Desktop),
+ * Usage (X),
+ * Report Size (16),
+ * Report Count (1),
+ * Push,
+ * Unit Exponent (13),
+ * Unit (Inch^3),
+ * Logical Minimum (0),
+ * Logical Maximum (16383),
+ * Physical Minimum (0),
+ * Physical Maximum (16383),
+ * Input (Variable),
+ * Usage (Y),
+ * Input (Variable),
+ * Usage Page (Digitizer),
+ * Usage (Tip Pressure),
+ * Logical Minimum (0),
+ * Logical Maximum (1023),
+ * Physical Minimum (0),
+ * Physical Maximum (1023),
+ * Input (Variable),
+ * End Collection,
+ * End Collection,
+ * Usage Page (Desktop),
+ * Usage (Keyboard),
+ * Collection (Application),
+ * Report ID (13),
+ * Usage Page (Keyboard),
+ * Usage Minimum (KB Leftcontrol),
+ * Usage Maximum (KB Right GUI),
+ * Logical Minimum (0),
+ * Logical Maximum (1),
+ * Report Size (1),
+ * Report Count (8),
+ * Input (Variable),
+ * Report Size (8),
+ * Report Count (1),
+ * Input (Constant),
+ * Usage Page (Keyboard),
+ * Usage Minimum (None),
+ * Usage Maximum (KB Application),
+ * Logical Minimum (0),
+ * Logical Maximum (101),
+ * Report Size (8),
+ * Report Count (5),
+ * Input,
+ * End Collection,
+ * Usage Page (Consumer),
+ * Usage (Consumer Control),
+ * Collection (Application),
+ * Report ID (12),
+ * Usage (Volume Inc),
+ * Usage (Volume Dec),
+ * Usage (Mute),
+ * Logical Minimum (0),
+ * Logical Maximum (1),
+ * Report Size (1),
+ * Report Count (3),
+ * Input (Variable, Relative),
+ * Report Size (5),
+ * Report Count (1),
+ * Input (Constant, Variable, Relative),
+ * End Collection
+ */
+
+/* Size of the original report descriptor of Media Tablet 14.1 inch */
+#define MEDIA_TABLET_14_1_INCH_RDESC_ORIG_SIZE 309
+
+/*
+ * Fixed Media Tablet 14.1 inch descriptor.
+ * It is fixed similarly to the Media Tablet 10.6 inch descriptor.
+ */
+static __u8 media_tablet_14_1_inch_rdesc_fixed[] = {
+ 0x05, 0x0D, /* Usage Page (Digitizer), */
+ 0x09, 0x02, /* Usage (Pen), */
+ 0xA1, 0x01, /* Collection (Application), */
+ 0x85, 0x10, /* Report ID (16), */
+ 0x09, 0x20, /* Usage (Stylus), */
+ 0xA0, /* Collection (Physical), */
+ 0x09, 0x42, /* Usage (Tip Switch), */
+ 0x09, 0x44, /* Usage (Barrel Switch), */
+ 0x09, 0x46, /* Usage (Tablet Pick), */
+ 0x15, 0x01, /* Logical Minimum (1), */
+ 0x25, 0x03, /* Logical Maximum (3), */
+ 0x75, 0x04, /* Report Size (4), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x80, /* Input, */
+ 0x75, 0x01, /* Report Size (1), */
+ 0x09, 0x32, /* Usage (In Range), */
+ 0x14, /* Logical Minimum (0), */
+ 0x25, 0x01, /* Logical Maximum (1), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0x95, 0x03, /* Report Count (3), */
+ 0x81, 0x03, /* Input (Constant, Variable), */
+ 0x75, 0x10, /* Report Size (16), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x14, /* Logical Minimum (0), */
+ 0xA4, /* Push, */
+ 0x05, 0x01, /* Usage Page (Desktop), */
+ 0x65, 0x13, /* Unit (Inch), */
+ 0x55, 0xFD, /* Unit Exponent (-3), */
+ 0x34, /* Physical Minimum (0), */
+ 0x09, 0x30, /* Usage (X), */
+ 0x46, 0xE0, 0x2E, /* Physical Maximum (12000), */
+ 0x26, 0xFF, 0x3F, /* Logical Maximum (16383), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0x09, 0x31, /* Usage (Y), */
+ 0x46, 0x52, 0x1C, /* Physical Maximum (7250), */
+ 0x26, 0xFF, 0x3F, /* Logical Maximum (16383), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0xB4, /* Pop, */
+ 0x09, 0x30, /* Usage (Tip Pressure), */
+ 0x26, 0xFF, 0x03, /* Logical Maximum (1023), */
+ 0x81, 0x02, /* Input (Variable), */
+ 0xC0, /* End Collection, */
+ 0xC0, /* End Collection, */
+ 0x05, 0x01, /* Usage Page (Desktop), */
+ 0x09, 0x02, /* Usage (Mouse), */
+ 0xA1, 0x01, /* Collection (Application), */
+ 0x85, 0x01, /* Report ID (1), */
+ 0x09, 0x01, /* Usage (Pointer), */
+ 0xA0, /* Collection (Physical), */
+ 0x75, 0x08, /* Report Size (8), */
+ 0x95, 0x03, /* Report Count (3), */
+ 0x81, 0x03, /* Input (Constant, Variable), */
+ 0x95, 0x02, /* Report Count (2), */
+ 0x15, 0xFF, /* Logical Minimum (-1), */
+ 0x25, 0x01, /* Logical Maximum (1), */
+ 0x09, 0x38, /* Usage (Wheel), */
+ 0x0B, 0x38, 0x02, /* Usage (Consumer AC Pan), */
+ 0x0C, 0x00,
+ 0x81, 0x06, /* Input (Variable, Relative), */
+ 0xC0, /* End Collection, */
+ 0xC0, /* End Collection, */
+ 0x05, 0x0C, /* Usage Page (Consumer), */
+ 0x09, 0x01, /* Usage (Consumer Control), */
+ 0xA1, 0x01, /* Collection (Application), */
+ 0x85, 0x0D, /* Report ID (13), */
+ 0x95, 0x01, /* Report Count (1), */
+ 0x75, 0x10, /* Report Size (16), */
+ 0x81, 0x03, /* Input (Constant, Variable), */
+ 0x0A, 0x2F, 0x02, /* Usage (AC Zoom), */
+ 0x0A, 0x2E, 0x02, /* Usage (AC Zoom Out), */
+ 0x0A, 0x2D, 0x02, /* Usage (AC Zoom In), */
+ 0x09, 0xB6, /* Usage (Scan Previous Track), */
+ 0x09, 0xB5, /* Usage (Scan Next Track), */
+ 0x08, /* Usage (00h), */
+ 0x08, /* Usage (00h), */
+ 0x08, /* Usage (00h), */
+ 0x08, /* Usage (00h), */
+ 0x08, /* Usage (00h), */
+ 0x0A, 0x2E, 0x02, /* Usage (AC Zoom Out), */
+ 0x0A, 0x2D, 0x02, /* Usage (AC Zoom In), */
+ 0x15, 0x0C, /* Logical Minimum (12), */
+ 0x25, 0x17, /* Logical Maximum (23), */
+ 0x75, 0x05, /* Report Size (5), */
+ 0x80, /* Input, */
+ 0x75, 0x03, /* Report Size (3), */
+ 0x81, 0x03, /* Input (Constant, Variable), */
+ 0x75, 0x20, /* Report Size (32), */
+ 0x81, 0x03, /* Input (Constant, Variable), */
+ 0xC0, /* End Collection, */
+ 0x09, 0x01, /* Usage (Consumer Control), */
+ 0xA1, 0x01, /* Collection (Application), */
+ 0x85, 0x0C, /* Report ID (12), */
+ 0x75, 0x01, /* Report Size (1), */
+ 0x09, 0xE9, /* Usage (Volume Inc), */
+ 0x09, 0xEA, /* Usage (Volume Dec), */
+ 0x09, 0xE2, /* Usage (Mute), */
+ 0x14, /* Logical Minimum (0), */
+ 0x25, 0x01, /* Logical Maximum (1), */
+ 0x95, 0x03, /* Report Count (3), */
+ 0x81, 0x06, /* Input (Variable, Relative), */
+ 0x75, 0x05, /* Report Size (5), */
+ 0x81, 0x03, /* Input (Constant, Variable), */
+ 0xC0 /* End Collection */
+};
+
+static __u8 *waltop_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
+{
+ switch (hdev->product) {
+ case USB_DEVICE_ID_WALTOP_SLIM_TABLET_5_8_INCH:
+ if (*rsize == SLIM_TABLET_5_8_INCH_RDESC_ORIG_SIZE) {
+ rdesc = slim_tablet_5_8_inch_rdesc_fixed;
+ *rsize = sizeof(slim_tablet_5_8_inch_rdesc_fixed);
+ }
+ break;
+ case USB_DEVICE_ID_WALTOP_SLIM_TABLET_12_1_INCH:
+ if (*rsize == SLIM_TABLET_12_1_INCH_RDESC_ORIG_SIZE) {
+ rdesc = slim_tablet_12_1_inch_rdesc_fixed;
+ *rsize = sizeof(slim_tablet_12_1_inch_rdesc_fixed);
+ }
+ break;
+ case USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH:
+ if (*rsize == MEDIA_TABLET_10_6_INCH_RDESC_ORIG_SIZE) {
+ rdesc = media_tablet_10_6_inch_rdesc_fixed;
+ *rsize = sizeof(media_tablet_10_6_inch_rdesc_fixed);
+ }
+ break;
+ case USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH:
+ if (*rsize == MEDIA_TABLET_14_1_INCH_RDESC_ORIG_SIZE) {
+ rdesc = media_tablet_14_1_inch_rdesc_fixed;
+ *rsize = sizeof(media_tablet_14_1_inch_rdesc_fixed);
+ }
+ break;
+ }
+ return rdesc;
+}
+
+static const struct hid_device_id waltop_devices[] = {
+ { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP,
+ USB_DEVICE_ID_WALTOP_SLIM_TABLET_5_8_INCH) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP,
+ USB_DEVICE_ID_WALTOP_SLIM_TABLET_12_1_INCH) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP,
+ USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP,
+ USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH) },
+ { }
+};
+MODULE_DEVICE_TABLE(hid, waltop_devices);
+
+static struct hid_driver waltop_driver = {
+ .name = "waltop",
+ .id_table = waltop_devices,
+ .report_fixup = waltop_report_fixup,
+};
+
+static int __init waltop_init(void)
+{
+ return hid_register_driver(&waltop_driver);
+}
+
+static void __exit waltop_exit(void)
+{
+ hid_unregister_driver(&waltop_driver);
+}
+
+module_init(waltop_init);
+module_exit(waltop_exit);
+MODULE_LICENSE("GPL");
diff --git a/drivers/hid/hid-zydacron.c b/drivers/hid/hid-zydacron.c
index 9e8d35a..aac1f92 100644
--- a/drivers/hid/hid-zydacron.c
+++ b/drivers/hid/hid-zydacron.c
@@ -27,10 +27,10 @@
* Zydacron remote control has an invalid HID report descriptor,
* that needs fixing before we can parse it.
*/
-static void zc_report_fixup(struct hid_device *hdev, __u8 *rdesc,
- unsigned int rsize)
+static __u8 *zc_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
{
- if (rsize >= 253 &&
+ if (*rsize >= 253 &&
rdesc[0x96] == 0xbc && rdesc[0x97] == 0xff &&
rdesc[0xca] == 0xbc && rdesc[0xcb] == 0xff &&
rdesc[0xe1] == 0xbc && rdesc[0xe2] == 0xff) {
@@ -40,6 +40,7 @@
rdesc[0x96] = rdesc[0xca] = rdesc[0xe1] = 0x0c;
rdesc[0x97] = rdesc[0xcb] = rdesc[0xe2] = 0x00;
}
+ return rdesc;
}
#define zc_map_key_clear(c) \
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index 925992f..8a4b32d 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -218,9 +218,13 @@
unsigned int minor = iminor(inode);
struct hidraw *dev;
struct hidraw_list *list = file->private_data;
+ int ret;
- if (!hidraw_table[minor])
- return -ENODEV;
+ mutex_lock(&minors_lock);
+ if (!hidraw_table[minor]) {
+ ret = -ENODEV;
+ goto unlock;
+ }
list_del(&list->node);
dev = hidraw_table[minor];
@@ -233,10 +237,12 @@
kfree(list->hidraw);
}
}
-
kfree(list);
+ ret = 0;
+unlock:
+ mutex_unlock(&minors_lock);
- return 0;
+ return ret;
}
static long hidraw_ioctl(struct file *file, unsigned int cmd,
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 599041a..5489eab 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -807,9 +807,10 @@
struct usb_host_interface *interface = intf->cur_altsetting;
int ret;
- if (usbhid->urbout) {
+ if (usbhid->urbout && report_type != HID_FEATURE_REPORT) {
int actual_length;
int skipped_report_id = 0;
+
if (buf[0] == 0x0) {
/* Don't send the Report ID */
buf++;
@@ -1469,9 +1470,6 @@
retval = usbhid_quirks_init(quirks_param);
if (retval)
goto usbhid_quirks_init_fail;
- retval = hiddev_init();
- if (retval)
- goto hiddev_init_fail;
retval = usb_register(&hid_driver);
if (retval)
goto usb_register_fail;
@@ -1479,8 +1477,6 @@
return 0;
usb_register_fail:
- hiddev_exit();
-hiddev_init_fail:
usbhid_quirks_exit();
usbhid_quirks_init_fail:
hid_unregister_driver(&hid_usb_driver);
@@ -1493,7 +1489,6 @@
static void __exit hid_exit(void)
{
usb_deregister(&hid_driver);
- hiddev_exit();
usbhid_quirks_exit();
hid_unregister_driver(&hid_usb_driver);
destroy_workqueue(resumption_waker);
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index f0260c6..2c18547 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -34,7 +34,6 @@
{ USB_VENDOR_ID_ALPS, USB_DEVICE_ID_IBM_GAMEPAD, HID_QUIRK_BADPAD },
{ USB_VENDOR_ID_CHIC, USB_DEVICE_ID_CHIC_GAMEPAD, HID_QUIRK_BADPAD },
{ USB_VENDOR_ID_DWAV, USB_DEVICE_ID_EGALAX_TOUCHCONTROLLER, HID_QUIRK_MULTI_INPUT | HID_QUIRK_NOGET },
- { USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH, HID_QUIRK_MULTI_INPUT },
{ USB_VENDOR_ID_MOJO, USB_DEVICE_ID_RETRO_ADAPTER, HID_QUIRK_MULTI_INPUT },
{ USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_TURBOX_TOUCHSCREEN_MOSART, HID_QUIRK_MULTI_INPUT },
{ USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_DRIVING, HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT },
@@ -63,6 +62,7 @@
{ USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FLIGHT_SIM_YOKE, HID_QUIRK_NOGET },
{ USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_PEDALS, HID_QUIRK_NOGET },
{ USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_3AXIS_5BUTTON_STICK, HID_QUIRK_NOGET },
+ { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_AXIS_295, HID_QUIRK_NOGET },
{ USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
{ USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET },
{ USB_VENDOR_ID_PRODIGE, USB_DEVICE_ID_PRODIGE_CORDLESS, HID_QUIRK_NOGET },
@@ -72,6 +72,10 @@
{ USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_PF1209, HID_QUIRK_MULTI_INPUT },
{ USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP4030U, HID_QUIRK_MULTI_INPUT },
{ USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_KNA5, HID_QUIRK_MULTI_INPUT },
+ { USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP5540U, HID_QUIRK_MULTI_INPUT },
+ { USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP8060U, HID_QUIRK_MULTI_INPUT },
+ { USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH, HID_QUIRK_MULTI_INPUT },
+ { USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH, HID_QUIRK_MULTI_INPUT },
{ USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_DUAL_USB_JOYPAD, HID_QUIRK_NOGET | HID_QUIRK_MULTI_INPUT | HID_QUIRK_SKIP_OUTPUT_REPORTS },
{ USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_QUAD_USB_JOYPAD, HID_QUIRK_NOGET | HID_QUIRK_MULTI_INPUT },
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index dfcb276..fedd88d 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -67,8 +67,6 @@
struct mutex thread_lock;
};
-static struct usb_driver hiddev_driver;
-
/*
* Find a report, given the report's type and ID. The ID can be specified
* indirectly by REPORT_ID_FIRST (which returns the first report of the given
@@ -926,41 +924,3 @@
kfree(hiddev);
}
}
-
-/* Currently this driver is a USB driver. It's not a conventional one in
- * the sense that it doesn't probe at the USB level. Instead it waits to
- * be connected by HID through the hiddev_connect / hiddev_disconnect
- * routines. The reason to register as a USB device is to gain part of the
- * minor number space from the USB major.
- *
- * In theory, should the HID code be generalized to more than one physical
- * medium (say, IEEE 1384), this driver will probably need to register its
- * own major number, and in doing so, no longer need to register with USB.
- * At that point the probe routine and hiddev_driver struct below will no
- * longer be useful.
- */
-
-
-/* We never attach in this manner, and rely on HID to connect us. This
- * is why there is no disconnect routine defined in the usb_driver either.
- */
-static int hiddev_usbd_probe(struct usb_interface *intf,
- const struct usb_device_id *hiddev_info)
-{
- return -ENODEV;
-}
-
-static /* const */ struct usb_driver hiddev_driver = {
- .name = "hiddev",
- .probe = hiddev_usbd_probe,
-};
-
-int __init hiddev_init(void)
-{
- return usb_register(&hiddev_driver);
-}
-
-void hiddev_exit(void)
-{
- usb_deregister(&hiddev_driver);
-}
diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig
index 30f06e9..b923074 100644
--- a/drivers/i2c/Kconfig
+++ b/drivers/i2c/Kconfig
@@ -75,7 +75,8 @@
In doubt, say Y.
config I2C_SMBUS
- tristate "SMBus-specific protocols" if !I2C_HELPER_AUTO
+ tristate
+ prompt "SMBus-specific protocols" if !I2C_HELPER_AUTO
help
Say Y here if you want support for SMBus extensions to the I2C
specification. At the moment, the only supported extension is
diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile
index c00fd66..23ac61e 100644
--- a/drivers/i2c/Makefile
+++ b/drivers/i2c/Makefile
@@ -9,6 +9,4 @@
obj-$(CONFIG_I2C_MUX) += i2c-mux.o
obj-y += algos/ busses/ muxes/
-ifeq ($(CONFIG_I2C_DEBUG_CORE),y)
-EXTRA_CFLAGS += -DDEBUG
-endif
+ccflags-$(CONFIG_I2C_DEBUG_CORE) := -DDEBUG
diff --git a/drivers/i2c/algos/Kconfig b/drivers/i2c/algos/Kconfig
index 7b2ce4a..3998dd6 100644
--- a/drivers/i2c/algos/Kconfig
+++ b/drivers/i2c/algos/Kconfig
@@ -15,3 +15,15 @@
tristate "I2C PCA 9564 interfaces"
endmenu
+
+# In automatic configuration mode, we still have to define the
+# symbols to avoid unmet dependencies.
+
+if I2C_HELPER_AUTO
+config I2C_ALGOBIT
+ tristate
+config I2C_ALGOPCF
+ tristate
+config I2C_ALGOPCA
+ tristate
+endif
diff --git a/drivers/i2c/algos/Makefile b/drivers/i2c/algos/Makefile
index 18b3e96..215303f 100644
--- a/drivers/i2c/algos/Makefile
+++ b/drivers/i2c/algos/Makefile
@@ -6,6 +6,4 @@
obj-$(CONFIG_I2C_ALGOPCF) += i2c-algo-pcf.o
obj-$(CONFIG_I2C_ALGOPCA) += i2c-algo-pca.o
-ifeq ($(CONFIG_I2C_DEBUG_ALGO),y)
-EXTRA_CFLAGS += -DDEBUG
-endif
+ccflags-$(CONFIG_I2C_DEBUG_ALGO) := -DDEBUG
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index c3ef492..033ad41 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -76,6 +76,4 @@
obj-$(CONFIG_SCx200_ACB) += scx200_acb.o
obj-$(CONFIG_SCx200_I2C) += scx200_i2c.o
-ifeq ($(CONFIG_I2C_DEBUG_BUS),y)
-EXTRA_CFLAGS += -DDEBUG
-endif
+ccflags-$(CONFIG_I2C_DEBUG_BUS) := -DDEBUG
diff --git a/drivers/i2c/busses/i2c-amd8111.c b/drivers/i2c/busses/i2c-amd8111.c
index af1e5e2..6b6a6b1 100644
--- a/drivers/i2c/busses/i2c-amd8111.c
+++ b/drivers/i2c/busses/i2c-amd8111.c
@@ -69,7 +69,7 @@
* ACPI 2.0 chapter 13 access of registers of the EC
*/
-static unsigned int amd_ec_wait_write(struct amd_smbus *smbus)
+static int amd_ec_wait_write(struct amd_smbus *smbus)
{
int timeout = 500;
@@ -85,7 +85,7 @@
return 0;
}
-static unsigned int amd_ec_wait_read(struct amd_smbus *smbus)
+static int amd_ec_wait_read(struct amd_smbus *smbus)
{
int timeout = 500;
@@ -101,7 +101,7 @@
return 0;
}
-static unsigned int amd_ec_read(struct amd_smbus *smbus, unsigned char address,
+static int amd_ec_read(struct amd_smbus *smbus, unsigned char address,
unsigned char *data)
{
int status;
@@ -124,7 +124,7 @@
return 0;
}
-static unsigned int amd_ec_write(struct amd_smbus *smbus, unsigned char address,
+static int amd_ec_write(struct amd_smbus *smbus, unsigned char address,
unsigned char data)
{
int status;
@@ -196,7 +196,7 @@
{
struct amd_smbus *smbus = adap->algo_data;
unsigned char protocol, len, pec, temp[2];
- int i;
+ int i, status;
protocol = (read_write == I2C_SMBUS_READ) ? AMD_SMB_PRTCL_READ
: AMD_SMB_PRTCL_WRITE;
@@ -209,38 +209,62 @@
break;
case I2C_SMBUS_BYTE:
- if (read_write == I2C_SMBUS_WRITE)
- amd_ec_write(smbus, AMD_SMB_CMD, command);
+ if (read_write == I2C_SMBUS_WRITE) {
+ status = amd_ec_write(smbus, AMD_SMB_CMD,
+ command);
+ if (status)
+ return status;
+ }
protocol |= AMD_SMB_PRTCL_BYTE;
break;
case I2C_SMBUS_BYTE_DATA:
- amd_ec_write(smbus, AMD_SMB_CMD, command);
- if (read_write == I2C_SMBUS_WRITE)
- amd_ec_write(smbus, AMD_SMB_DATA, data->byte);
+ status = amd_ec_write(smbus, AMD_SMB_CMD, command);
+ if (status)
+ return status;
+ if (read_write == I2C_SMBUS_WRITE) {
+ status = amd_ec_write(smbus, AMD_SMB_DATA,
+ data->byte);
+ if (status)
+ return status;
+ }
protocol |= AMD_SMB_PRTCL_BYTE_DATA;
break;
case I2C_SMBUS_WORD_DATA:
- amd_ec_write(smbus, AMD_SMB_CMD, command);
+ status = amd_ec_write(smbus, AMD_SMB_CMD, command);
+ if (status)
+ return status;
if (read_write == I2C_SMBUS_WRITE) {
- amd_ec_write(smbus, AMD_SMB_DATA,
- data->word & 0xff);
- amd_ec_write(smbus, AMD_SMB_DATA + 1,
- data->word >> 8);
+ status = amd_ec_write(smbus, AMD_SMB_DATA,
+ data->word & 0xff);
+ if (status)
+ return status;
+ status = amd_ec_write(smbus, AMD_SMB_DATA + 1,
+ data->word >> 8);
+ if (status)
+ return status;
}
protocol |= AMD_SMB_PRTCL_WORD_DATA | pec;
break;
case I2C_SMBUS_BLOCK_DATA:
- amd_ec_write(smbus, AMD_SMB_CMD, command);
+ status = amd_ec_write(smbus, AMD_SMB_CMD, command);
+ if (status)
+ return status;
if (read_write == I2C_SMBUS_WRITE) {
len = min_t(u8, data->block[0],
I2C_SMBUS_BLOCK_MAX);
- amd_ec_write(smbus, AMD_SMB_BCNT, len);
- for (i = 0; i < len; i++)
- amd_ec_write(smbus, AMD_SMB_DATA + i,
- data->block[i + 1]);
+ status = amd_ec_write(smbus, AMD_SMB_BCNT, len);
+ if (status)
+ return status;
+ for (i = 0; i < len; i++) {
+ status =
+ amd_ec_write(smbus, AMD_SMB_DATA + i,
+ data->block[i + 1]);
+ if (status)
+ return status;
+ }
}
protocol |= AMD_SMB_PRTCL_BLOCK_DATA | pec;
break;
@@ -248,19 +272,35 @@
case I2C_SMBUS_I2C_BLOCK_DATA:
len = min_t(u8, data->block[0],
I2C_SMBUS_BLOCK_MAX);
- amd_ec_write(smbus, AMD_SMB_CMD, command);
- amd_ec_write(smbus, AMD_SMB_BCNT, len);
+ status = amd_ec_write(smbus, AMD_SMB_CMD, command);
+ if (status)
+ return status;
+ status = amd_ec_write(smbus, AMD_SMB_BCNT, len);
+ if (status)
+ return status;
if (read_write == I2C_SMBUS_WRITE)
- for (i = 0; i < len; i++)
- amd_ec_write(smbus, AMD_SMB_DATA + i,
- data->block[i + 1]);
+ for (i = 0; i < len; i++) {
+ status =
+ amd_ec_write(smbus, AMD_SMB_DATA + i,
+ data->block[i + 1]);
+ if (status)
+ return status;
+ }
protocol |= AMD_SMB_PRTCL_I2C_BLOCK_DATA;
break;
case I2C_SMBUS_PROC_CALL:
- amd_ec_write(smbus, AMD_SMB_CMD, command);
- amd_ec_write(smbus, AMD_SMB_DATA, data->word & 0xff);
- amd_ec_write(smbus, AMD_SMB_DATA + 1, data->word >> 8);
+ status = amd_ec_write(smbus, AMD_SMB_CMD, command);
+ if (status)
+ return status;
+ status = amd_ec_write(smbus, AMD_SMB_DATA,
+ data->word & 0xff);
+ if (status)
+ return status;
+ status = amd_ec_write(smbus, AMD_SMB_DATA + 1,
+ data->word >> 8);
+ if (status)
+ return status;
protocol = AMD_SMB_PRTCL_PROC_CALL | pec;
read_write = I2C_SMBUS_READ;
break;
@@ -268,11 +308,18 @@
case I2C_SMBUS_BLOCK_PROC_CALL:
len = min_t(u8, data->block[0],
I2C_SMBUS_BLOCK_MAX - 1);
- amd_ec_write(smbus, AMD_SMB_CMD, command);
- amd_ec_write(smbus, AMD_SMB_BCNT, len);
- for (i = 0; i < len; i++)
- amd_ec_write(smbus, AMD_SMB_DATA + i,
- data->block[i + 1]);
+ status = amd_ec_write(smbus, AMD_SMB_CMD, command);
+ if (status)
+ return status;
+ status = amd_ec_write(smbus, AMD_SMB_BCNT, len);
+ if (status)
+ return status;
+ for (i = 0; i < len; i++) {
+ status = amd_ec_write(smbus, AMD_SMB_DATA + i,
+ data->block[i + 1]);
+ if (status)
+ return status;
+ }
protocol = AMD_SMB_PRTCL_BLOCK_PROC_CALL | pec;
read_write = I2C_SMBUS_READ;
break;
@@ -282,24 +329,29 @@
return -EOPNOTSUPP;
}
- amd_ec_write(smbus, AMD_SMB_ADDR, addr << 1);
- amd_ec_write(smbus, AMD_SMB_PRTCL, protocol);
+ status = amd_ec_write(smbus, AMD_SMB_ADDR, addr << 1);
+ if (status)
+ return status;
+ status = amd_ec_write(smbus, AMD_SMB_PRTCL, protocol);
+ if (status)
+ return status;
- /* FIXME this discards status from ec_read(); so temp[0] will
- * hold stack garbage ... the rest of this routine will act
- * nonsensically. Ignored ec_write() status might explain
- * some such failures...
- */
- amd_ec_read(smbus, AMD_SMB_STS, temp + 0);
+ status = amd_ec_read(smbus, AMD_SMB_STS, temp + 0);
+ if (status)
+ return status;
if (~temp[0] & AMD_SMB_STS_DONE) {
udelay(500);
- amd_ec_read(smbus, AMD_SMB_STS, temp + 0);
+ status = amd_ec_read(smbus, AMD_SMB_STS, temp + 0);
+ if (status)
+ return status;
}
if (~temp[0] & AMD_SMB_STS_DONE) {
msleep(1);
- amd_ec_read(smbus, AMD_SMB_STS, temp + 0);
+ status = amd_ec_read(smbus, AMD_SMB_STS, temp + 0);
+ if (status)
+ return status;
}
if ((~temp[0] & AMD_SMB_STS_DONE) || (temp[0] & AMD_SMB_STS_STATUS))
@@ -311,24 +363,35 @@
switch (size) {
case I2C_SMBUS_BYTE:
case I2C_SMBUS_BYTE_DATA:
- amd_ec_read(smbus, AMD_SMB_DATA, &data->byte);
+ status = amd_ec_read(smbus, AMD_SMB_DATA, &data->byte);
+ if (status)
+ return status;
break;
case I2C_SMBUS_WORD_DATA:
case I2C_SMBUS_PROC_CALL:
- amd_ec_read(smbus, AMD_SMB_DATA, temp + 0);
- amd_ec_read(smbus, AMD_SMB_DATA + 1, temp + 1);
+ status = amd_ec_read(smbus, AMD_SMB_DATA, temp + 0);
+ if (status)
+ return status;
+ status = amd_ec_read(smbus, AMD_SMB_DATA + 1, temp + 1);
+ if (status)
+ return status;
data->word = (temp[1] << 8) | temp[0];
break;
case I2C_SMBUS_BLOCK_DATA:
case I2C_SMBUS_BLOCK_PROC_CALL:
- amd_ec_read(smbus, AMD_SMB_BCNT, &len);
+ status = amd_ec_read(smbus, AMD_SMB_BCNT, &len);
+ if (status)
+ return status;
len = min_t(u8, len, I2C_SMBUS_BLOCK_MAX);
case I2C_SMBUS_I2C_BLOCK_DATA:
- for (i = 0; i < len; i++)
- amd_ec_read(smbus, AMD_SMB_DATA + i,
- data->block + i + 1);
+ for (i = 0; i < len; i++) {
+ status = amd_ec_read(smbus, AMD_SMB_DATA + i,
+ data->block + i + 1);
+ if (status)
+ return status;
+ }
data->block[0] = len;
break;
}
diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c
index 89eedf4..6e3c382 100644
--- a/drivers/i2c/busses/i2c-ibm_iic.c
+++ b/drivers/i2c/busses/i2c-ibm_iic.c
@@ -41,7 +41,6 @@
#include <asm/irq.h>
#include <linux/io.h>
#include <linux/i2c.h>
-#include <linux/i2c-id.h>
#include <linux/of_platform.h>
#include <linux/of_i2c.h>
diff --git a/drivers/i2c/busses/i2c-nuc900.c b/drivers/i2c/busses/i2c-nuc900.c
index 92d770d..7243426 100644
--- a/drivers/i2c/busses/i2c-nuc900.c
+++ b/drivers/i2c/busses/i2c-nuc900.c
@@ -16,7 +16,6 @@
#include <linux/module.h>
#include <linux/i2c.h>
-#include <linux/i2c-id.h>
#include <linux/init.h>
#include <linux/time.h>
#include <linux/interrupt.h>
diff --git a/drivers/i2c/busses/i2c-pca-platform.c b/drivers/i2c/busses/i2c-pca-platform.c
index 5f6d7f8..ace6799 100644
--- a/drivers/i2c/busses/i2c-pca-platform.c
+++ b/drivers/i2c/busses/i2c-pca-platform.c
@@ -224,7 +224,7 @@
if (irq) {
ret = request_irq(irq, i2c_pca_pf_handler,
- IRQF_TRIGGER_FALLING, i2c->adap.name, i2c);
+ IRQF_TRIGGER_FALLING, pdev->name, i2c);
if (ret)
goto e_reqirq;
}
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index c94e51b..f4c19a9 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -22,7 +22,6 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/i2c.h>
-#include <linux/i2c-id.h>
#include <linux/init.h>
#include <linux/time.h>
#include <linux/sched.h>
diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index bf831bf..6a292ea 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -24,7 +24,6 @@
#include <linux/module.h>
#include <linux/i2c.h>
-#include <linux/i2c-id.h>
#include <linux/init.h>
#include <linux/time.h>
#include <linux/interrupt.h>
diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c
index 4c6fff5..0b012f1 100644
--- a/drivers/i2c/busses/i2c-viapro.c
+++ b/drivers/i2c/busses/i2c-viapro.c
@@ -185,14 +185,8 @@
}
if (temp & 0x04) {
- int read = inb_p(SMBHSTADD) & 0x01;
result = -ENXIO;
- /* The quick and receive byte commands are used to probe
- for chips, so errors are expected, and we don't want
- to frighten the user. */
- if (!((size == VT596_QUICK && !read) ||
- (size == VT596_BYTE && read)))
- dev_err(&vt596_adapter.dev, "Transaction error!\n");
+ dev_dbg(&vt596_adapter.dev, "No response\n");
}
/* Resetting status register */
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index bea4c50..d231f68 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -425,14 +425,14 @@
/* walk up mux tree */
static int i2c_check_mux_parents(struct i2c_adapter *adapter, int addr)
{
+ struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
int result;
result = device_for_each_child(&adapter->dev, &addr,
__i2c_check_addr_busy);
- if (!result && i2c_parent_is_i2c_adapter(adapter))
- result = i2c_check_mux_parents(
- to_i2c_adapter(adapter->dev.parent), addr);
+ if (!result && parent)
+ result = i2c_check_mux_parents(parent, addr);
return result;
}
@@ -453,11 +453,11 @@
static int i2c_check_addr_busy(struct i2c_adapter *adapter, int addr)
{
+ struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
int result = 0;
- if (i2c_parent_is_i2c_adapter(adapter))
- result = i2c_check_mux_parents(
- to_i2c_adapter(adapter->dev.parent), addr);
+ if (parent)
+ result = i2c_check_mux_parents(parent, addr);
if (!result)
result = device_for_each_child(&adapter->dev, &addr,
@@ -472,8 +472,10 @@
*/
void i2c_lock_adapter(struct i2c_adapter *adapter)
{
- if (i2c_parent_is_i2c_adapter(adapter))
- i2c_lock_adapter(to_i2c_adapter(adapter->dev.parent));
+ struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
+
+ if (parent)
+ i2c_lock_adapter(parent);
else
rt_mutex_lock(&adapter->bus_lock);
}
@@ -485,8 +487,10 @@
*/
static int i2c_trylock_adapter(struct i2c_adapter *adapter)
{
- if (i2c_parent_is_i2c_adapter(adapter))
- return i2c_trylock_adapter(to_i2c_adapter(adapter->dev.parent));
+ struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
+
+ if (parent)
+ return i2c_trylock_adapter(parent);
else
return rt_mutex_trylock(&adapter->bus_lock);
}
@@ -497,8 +501,10 @@
*/
void i2c_unlock_adapter(struct i2c_adapter *adapter)
{
- if (i2c_parent_is_i2c_adapter(adapter))
- i2c_unlock_adapter(to_i2c_adapter(adapter->dev.parent));
+ struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
+
+ if (parent)
+ i2c_unlock_adapter(parent);
else
rt_mutex_unlock(&adapter->bus_lock);
}
@@ -677,8 +683,6 @@
char *blank, end;
int res;
- dev_warn(dev, "The new_device interface is still experimental "
- "and may change in a near future\n");
memset(&info, 0, sizeof(struct i2c_board_info));
blank = strchr(buf, ' ');
@@ -1504,26 +1508,25 @@
if (!driver->detect || !address_list)
return 0;
+ /* Stop here if the classes do not match */
+ if (!(adapter->class & driver->class))
+ return 0;
+
/* Set up a temporary client to help detect callback */
temp_client = kzalloc(sizeof(struct i2c_client), GFP_KERNEL);
if (!temp_client)
return -ENOMEM;
temp_client->adapter = adapter;
- /* Stop here if the classes do not match */
- if (!(adapter->class & driver->class))
- goto exit_free;
-
for (i = 0; address_list[i] != I2C_CLIENT_END; i += 1) {
dev_dbg(&adapter->dev, "found normal entry for adapter %d, "
"addr 0x%02x\n", adap_id, address_list[i]);
temp_client->addr = address_list[i];
err = i2c_detect_address(temp_client, driver);
- if (err)
- goto exit_free;
+ if (unlikely(err))
+ break;
}
- exit_free:
kfree(temp_client);
return err;
}
diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
index 5f3a52d..cec0f3b 100644
--- a/drivers/i2c/i2c-dev.c
+++ b/drivers/i2c/i2c-dev.c
@@ -192,13 +192,12 @@
/* walk up mux tree */
static int i2cdev_check_mux_parents(struct i2c_adapter *adapter, int addr)
{
+ struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
int result;
result = device_for_each_child(&adapter->dev, &addr, i2cdev_check);
-
- if (!result && i2c_parent_is_i2c_adapter(adapter))
- result = i2cdev_check_mux_parents(
- to_i2c_adapter(adapter->dev.parent), addr);
+ if (!result && parent)
+ result = i2cdev_check_mux_parents(parent, addr);
return result;
}
@@ -222,11 +221,11 @@
driver bound to it, as NOT busy. */
static int i2cdev_check_addr(struct i2c_adapter *adapter, unsigned int addr)
{
+ struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
int result = 0;
- if (i2c_parent_is_i2c_adapter(adapter))
- result = i2cdev_check_mux_parents(
- to_i2c_adapter(adapter->dev.parent), addr);
+ if (parent)
+ result = i2cdev_check_mux_parents(parent, addr);
if (!result)
result = device_for_each_child(&adapter->dev, &addr,
diff --git a/drivers/i2c/muxes/Kconfig b/drivers/i2c/muxes/Kconfig
index 4c9a99c..4d91d80 100644
--- a/drivers/i2c/muxes/Kconfig
+++ b/drivers/i2c/muxes/Kconfig
@@ -5,6 +5,16 @@
menu "Multiplexer I2C Chip support"
depends on I2C_MUX
+config I2C_MUX_PCA9541
+ tristate "NXP PCA9541 I2C Master Selector"
+ depends on EXPERIMENTAL
+ help
+ If you say yes here you get support for the NXP PCA9541
+ I2C Master Selector.
+
+ This driver can also be built as a module. If so, the module
+ will be called pca9541.
+
config I2C_MUX_PCA954x
tristate "Philips PCA954x I2C Mux/switches"
depends on EXPERIMENTAL
diff --git a/drivers/i2c/muxes/Makefile b/drivers/i2c/muxes/Makefile
index bd83b527..d743806 100644
--- a/drivers/i2c/muxes/Makefile
+++ b/drivers/i2c/muxes/Makefile
@@ -1,8 +1,7 @@
#
# Makefile for multiplexer I2C chip drivers.
+obj-$(CONFIG_I2C_MUX_PCA9541) += pca9541.o
obj-$(CONFIG_I2C_MUX_PCA954x) += pca954x.o
-ifeq ($(CONFIG_I2C_DEBUG_BUS),y)
-EXTRA_CFLAGS += -DDEBUG
-endif
+ccflags-$(CONFIG_I2C_DEBUG_BUS) := -DDEBUG
diff --git a/drivers/i2c/muxes/pca9541.c b/drivers/i2c/muxes/pca9541.c
new file mode 100644
index 0000000..ed699c5
--- /dev/null
+++ b/drivers/i2c/muxes/pca9541.c
@@ -0,0 +1,411 @@
+/*
+ * I2C multiplexer driver for PCA9541 bus master selector
+ *
+ * Copyright (c) 2010 Ericsson AB.
+ *
+ * Author: Guenter Roeck <guenter.roeck@ericsson.com>
+ *
+ * Derived from:
+ * pca954x.c
+ *
+ * Copyright (c) 2008-2009 Rodolfo Giometti <giometti@linux.it>
+ * Copyright (c) 2008-2009 Eurotech S.p.A. <info@eurotech.it>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/i2c.h>
+#include <linux/i2c-mux.h>
+
+#include <linux/i2c/pca954x.h>
+
+/*
+ * The PCA9541 is a bus master selector. It supports two I2C masters connected
+ * to a single slave bus.
+ *
+ * Before each bus transaction, a master has to acquire bus ownership. After the
+ * transaction is complete, bus ownership has to be released. This fits well
+ * into the I2C multiplexer framework, which provides select and release
+ * functions for this purpose. For this reason, this driver is modeled as
+ * single-channel I2C bus multiplexer.
+ *
+ * This driver assumes that the two bus masters are controlled by two different
+ * hosts. If a single host controls both masters, platform code has to ensure
+ * that only one of the masters is instantiated at any given time.
+ */
+
+#define PCA9541_CONTROL 0x01
+#define PCA9541_ISTAT 0x02
+
+#define PCA9541_CTL_MYBUS (1 << 0)
+#define PCA9541_CTL_NMYBUS (1 << 1)
+#define PCA9541_CTL_BUSON (1 << 2)
+#define PCA9541_CTL_NBUSON (1 << 3)
+#define PCA9541_CTL_BUSINIT (1 << 4)
+#define PCA9541_CTL_TESTON (1 << 6)
+#define PCA9541_CTL_NTESTON (1 << 7)
+
+#define PCA9541_ISTAT_INTIN (1 << 0)
+#define PCA9541_ISTAT_BUSINIT (1 << 1)
+#define PCA9541_ISTAT_BUSOK (1 << 2)
+#define PCA9541_ISTAT_BUSLOST (1 << 3)
+#define PCA9541_ISTAT_MYTEST (1 << 6)
+#define PCA9541_ISTAT_NMYTEST (1 << 7)
+
+#define BUSON (PCA9541_CTL_BUSON | PCA9541_CTL_NBUSON)
+#define MYBUS (PCA9541_CTL_MYBUS | PCA9541_CTL_NMYBUS)
+#define mybus(x) (!((x) & MYBUS) || ((x) & MYBUS) == MYBUS)
+#define busoff(x) (!((x) & BUSON) || ((x) & BUSON) == BUSON)
+
+/* arbitration timeouts, in jiffies */
+#define ARB_TIMEOUT (HZ / 8) /* 125 ms until forcing bus ownership */
+#define ARB2_TIMEOUT (HZ / 4) /* 250 ms until acquisition failure */
+
+/* arbitration retry delays, in us */
+#define SELECT_DELAY_SHORT 50
+#define SELECT_DELAY_LONG 1000
+
+struct pca9541 {
+ struct i2c_adapter *mux_adap;
+ unsigned long select_timeout;
+ unsigned long arb_timeout;
+};
+
+static const struct i2c_device_id pca9541_id[] = {
+ {"pca9541", 0},
+ {}
+};
+
+MODULE_DEVICE_TABLE(i2c, pca9541_id);
+
+/*
+ * Write to chip register. Don't use i2c_transfer()/i2c_smbus_xfer()
+ * as they will try to lock the adapter a second time.
+ */
+static int pca9541_reg_write(struct i2c_client *client, u8 command, u8 val)
+{
+ struct i2c_adapter *adap = client->adapter;
+ int ret;
+
+ if (adap->algo->master_xfer) {
+ struct i2c_msg msg;
+ char buf[2];
+
+ msg.addr = client->addr;
+ msg.flags = 0;
+ msg.len = 2;
+ buf[0] = command;
+ buf[1] = val;
+ msg.buf = buf;
+ ret = adap->algo->master_xfer(adap, &msg, 1);
+ } else {
+ union i2c_smbus_data data;
+
+ data.byte = val;
+ ret = adap->algo->smbus_xfer(adap, client->addr,
+ client->flags,
+ I2C_SMBUS_WRITE,
+ command,
+ I2C_SMBUS_BYTE_DATA, &data);
+ }
+
+ return ret;
+}
+
+/*
+ * Read from chip register. Don't use i2c_transfer()/i2c_smbus_xfer()
+ * as they will try to lock adapter a second time.
+ */
+static int pca9541_reg_read(struct i2c_client *client, u8 command)
+{
+ struct i2c_adapter *adap = client->adapter;
+ int ret;
+ u8 val;
+
+ if (adap->algo->master_xfer) {
+ struct i2c_msg msg[2] = {
+ {
+ .addr = client->addr,
+ .flags = 0,
+ .len = 1,
+ .buf = &command
+ },
+ {
+ .addr = client->addr,
+ .flags = I2C_M_RD,
+ .len = 1,
+ .buf = &val
+ }
+ };
+ ret = adap->algo->master_xfer(adap, msg, 2);
+ if (ret == 2)
+ ret = val;
+ else if (ret >= 0)
+ ret = -EIO;
+ } else {
+ union i2c_smbus_data data;
+
+ ret = adap->algo->smbus_xfer(adap, client->addr,
+ client->flags,
+ I2C_SMBUS_READ,
+ command,
+ I2C_SMBUS_BYTE_DATA, &data);
+ if (!ret)
+ ret = data.byte;
+ }
+ return ret;
+}
+
+/*
+ * Arbitration management functions
+ */
+
+/* Release bus. Also reset NTESTON and BUSINIT if it was set. */
+static void pca9541_release_bus(struct i2c_client *client)
+{
+ int reg;
+
+ reg = pca9541_reg_read(client, PCA9541_CONTROL);
+ if (reg >= 0 && !busoff(reg) && mybus(reg))
+ pca9541_reg_write(client, PCA9541_CONTROL,
+ (reg & PCA9541_CTL_NBUSON) >> 1);
+}
+
+/*
+ * Arbitration is defined as a two-step process. A bus master can only activate
+ * the slave bus if it owns it; otherwise it has to request ownership first.
+ * This multi-step process ensures that access contention is resolved
+ * gracefully.
+ *
+ * Bus Ownership Other master Action
+ * state requested access
+ * ----------------------------------------------------
+ * off - yes wait for arbitration timeout or
+ * for other master to drop request
+ * off no no take ownership
+ * off yes no turn on bus
+ * on yes - done
+ * on no - wait for arbitration timeout or
+ * for other master to release bus
+ *
+ * The main contention point occurs if the slave bus is off and both masters
+ * request ownership at the same time. In this case, one master will turn on
+ * the slave bus, believing that it owns it. The other master will request
+ * bus ownership. Result is that the bus is turned on, and master which did
+ * _not_ own the slave bus before ends up owning it.
+ */
+
+/* Control commands per PCA9541 datasheet */
+static const u8 pca9541_control[16] = {
+ 4, 0, 1, 5, 4, 4, 5, 5, 0, 0, 1, 1, 0, 4, 5, 1
+};
+
+/*
+ * Channel arbitration
+ *
+ * Return values:
+ * <0: error
+ * 0 : bus not acquired
+ * 1 : bus acquired
+ */
+static int pca9541_arbitrate(struct i2c_client *client)
+{
+ struct pca9541 *data = i2c_get_clientdata(client);
+ int reg;
+
+ reg = pca9541_reg_read(client, PCA9541_CONTROL);
+ if (reg < 0)
+ return reg;
+
+ if (busoff(reg)) {
+ int istat;
+ /*
+ * Bus is off. Request ownership or turn it on unless
+ * other master requested ownership.
+ */
+ istat = pca9541_reg_read(client, PCA9541_ISTAT);
+ if (!(istat & PCA9541_ISTAT_NMYTEST)
+ || time_is_before_eq_jiffies(data->arb_timeout)) {
+ /*
+ * Other master did not request ownership,
+ * or arbitration timeout expired. Take the bus.
+ */
+ pca9541_reg_write(client,
+ PCA9541_CONTROL,
+ pca9541_control[reg & 0x0f]
+ | PCA9541_CTL_NTESTON);
+ data->select_timeout = SELECT_DELAY_SHORT;
+ } else {
+ /*
+ * Other master requested ownership.
+ * Set extra long timeout to give it time to acquire it.
+ */
+ data->select_timeout = SELECT_DELAY_LONG * 2;
+ }
+ } else if (mybus(reg)) {
+ /*
+ * Bus is on, and we own it. We are done with acquisition.
+ * Reset NTESTON and BUSINIT, then return success.
+ */
+ if (reg & (PCA9541_CTL_NTESTON | PCA9541_CTL_BUSINIT))
+ pca9541_reg_write(client,
+ PCA9541_CONTROL,
+ reg & ~(PCA9541_CTL_NTESTON
+ | PCA9541_CTL_BUSINIT));
+ return 1;
+ } else {
+ /*
+ * Other master owns the bus.
+ * If arbitration timeout has expired, force ownership.
+ * Otherwise request it.
+ */
+ data->select_timeout = SELECT_DELAY_LONG;
+ if (time_is_before_eq_jiffies(data->arb_timeout)) {
+ /* Time is up, take the bus and reset it. */
+ pca9541_reg_write(client,
+ PCA9541_CONTROL,
+ pca9541_control[reg & 0x0f]
+ | PCA9541_CTL_BUSINIT
+ | PCA9541_CTL_NTESTON);
+ } else {
+ /* Request bus ownership if needed */
+ if (!(reg & PCA9541_CTL_NTESTON))
+ pca9541_reg_write(client,
+ PCA9541_CONTROL,
+ reg | PCA9541_CTL_NTESTON);
+ }
+ }
+ return 0;
+}
+
+static int pca9541_select_chan(struct i2c_adapter *adap, void *client, u32 chan)
+{
+ struct pca9541 *data = i2c_get_clientdata(client);
+ int ret;
+ unsigned long timeout = jiffies + ARB2_TIMEOUT;
+ /* give up after this time */
+
+ data->arb_timeout = jiffies + ARB_TIMEOUT;
+ /* force bus ownership after this time */
+
+ do {
+ ret = pca9541_arbitrate(client);
+ if (ret)
+ return ret < 0 ? ret : 0;
+
+ if (data->select_timeout == SELECT_DELAY_SHORT)
+ udelay(data->select_timeout);
+ else
+ msleep(data->select_timeout / 1000);
+ } while (time_is_after_eq_jiffies(timeout));
+
+ return -ETIMEDOUT;
+}
+
+static int pca9541_release_chan(struct i2c_adapter *adap,
+ void *client, u32 chan)
+{
+ pca9541_release_bus(client);
+ return 0;
+}
+
+/*
+ * I2C init/probing/exit functions
+ */
+static int pca9541_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct i2c_adapter *adap = client->adapter;
+ struct pca954x_platform_data *pdata = client->dev.platform_data;
+ struct pca9541 *data;
+ int force;
+ int ret = -ENODEV;
+
+ if (!i2c_check_functionality(adap, I2C_FUNC_SMBUS_BYTE_DATA))
+ goto err;
+
+ data = kzalloc(sizeof(struct pca9541), GFP_KERNEL);
+ if (!data) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ i2c_set_clientdata(client, data);
+
+ /*
+ * I2C accesses are unprotected here.
+ * We have to lock the adapter before releasing the bus.
+ */
+ i2c_lock_adapter(adap);
+ pca9541_release_bus(client);
+ i2c_unlock_adapter(adap);
+
+ /* Create mux adapter */
+
+ force = 0;
+ if (pdata)
+ force = pdata->modes[0].adap_id;
+ data->mux_adap = i2c_add_mux_adapter(adap, client, force, 0,
+ pca9541_select_chan,
+ pca9541_release_chan);
+
+ if (data->mux_adap == NULL) {
+ dev_err(&client->dev, "failed to register master selector\n");
+ goto exit_free;
+ }
+
+ dev_info(&client->dev, "registered master selector for I2C %s\n",
+ client->name);
+
+ return 0;
+
+exit_free:
+ kfree(data);
+err:
+ return ret;
+}
+
+static int pca9541_remove(struct i2c_client *client)
+{
+ struct pca9541 *data = i2c_get_clientdata(client);
+
+ i2c_del_mux_adapter(data->mux_adap);
+
+ kfree(data);
+ return 0;
+}
+
+static struct i2c_driver pca9541_driver = {
+ .driver = {
+ .name = "pca9541",
+ .owner = THIS_MODULE,
+ },
+ .probe = pca9541_probe,
+ .remove = pca9541_remove,
+ .id_table = pca9541_id,
+};
+
+static int __init pca9541_init(void)
+{
+ return i2c_add_driver(&pca9541_driver);
+}
+
+static void __exit pca9541_exit(void)
+{
+ i2c_del_driver(&pca9541_driver);
+}
+
+module_init(pca9541_init);
+module_exit(pca9541_exit);
+
+MODULE_AUTHOR("Guenter Roeck <guenter.roeck@ericsson.com>");
+MODULE_DESCRIPTION("PCA9541 I2C master selector driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/i2c/muxes/pca954x.c b/drivers/i2c/muxes/pca954x.c
index 6f9accf..54e1ce7 100644
--- a/drivers/i2c/muxes/pca954x.c
+++ b/drivers/i2c/muxes/pca954x.c
@@ -181,8 +181,8 @@
/*
* I2C init/probing/exit functions
*/
-static int __devinit pca954x_probe(struct i2c_client *client,
- const struct i2c_device_id *id)
+static int pca954x_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
{
struct i2c_adapter *adap = to_i2c_adapter(client->dev.parent);
struct pca954x_platform_data *pdata = client->dev.platform_data;
@@ -255,7 +255,7 @@
return ret;
}
-static int __devexit pca954x_remove(struct i2c_client *client)
+static int pca954x_remove(struct i2c_client *client)
{
struct pca954x *data = i2c_get_clientdata(client);
const struct chip_desc *chip = &chips[data->type];
@@ -279,7 +279,7 @@
.owner = THIS_MODULE,
},
.probe = pca954x_probe,
- .remove = __devexit_p(pca954x_remove),
+ .remove = pca954x_remove,
.id_table = pca954x_id,
};
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 4e298bc..5a46b8c 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -32,6 +32,7 @@
* The pointer to our (page) of device descriptions.
*/
static void *kvm_devices;
+struct work_struct hotplug_work;
struct kvm_device {
struct virtio_device vdev;
@@ -328,13 +329,54 @@
}
/*
+ * match for a kvm device with a specific desc pointer
+ */
+static int match_desc(struct device *dev, void *data)
+{
+ if ((ulong)to_kvmdev(dev_to_virtio(dev))->desc == (ulong)data)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * hotplug_device tries to find changes in the device page.
+ */
+static void hotplug_devices(struct work_struct *dummy)
+{
+ unsigned int i;
+ struct kvm_device_desc *d;
+ struct device *dev;
+
+ for (i = 0; i < PAGE_SIZE; i += desc_size(d)) {
+ d = kvm_devices + i;
+
+ /* end of list */
+ if (d->type == 0)
+ break;
+
+ /* device already exists */
+ dev = device_find_child(kvm_root, d, match_desc);
+ if (dev) {
+ /* XXX check for hotplug remove */
+ put_device(dev);
+ continue;
+ }
+
+ /* new device */
+ printk(KERN_INFO "Adding new virtio device %p\n", d);
+ add_kvm_device(d, i);
+ }
+}
+
+/*
* we emulate the request_irq behaviour on top of s390 extints
*/
static void kvm_extint_handler(u16 code)
{
struct virtqueue *vq;
u16 subcode;
- int config_changed;
+ u32 param;
subcode = S390_lowcore.cpu_addr;
if ((subcode & 0xff00) != VIRTIO_SUBCODE_64)
@@ -343,18 +385,28 @@
/* The LSB might be overloaded, we have to mask it */
vq = (struct virtqueue *)(S390_lowcore.ext_params2 & ~1UL);
- /* We use the LSB of extparam, to decide, if this interrupt is a config
- * change or a "standard" interrupt */
- config_changed = S390_lowcore.ext_params & 1;
+ /* We use ext_params to decide what this interrupt means */
+ param = S390_lowcore.ext_params & VIRTIO_PARAM_MASK;
- if (config_changed) {
+ switch (param) {
+ case VIRTIO_PARAM_CONFIG_CHANGED:
+ {
struct virtio_driver *drv;
drv = container_of(vq->vdev->dev.driver,
struct virtio_driver, driver);
if (drv->config_changed)
drv->config_changed(vq->vdev);
- } else
+
+ break;
+ }
+ case VIRTIO_PARAM_DEV_ADD:
+ schedule_work(&hotplug_work);
+ break;
+ case VIRTIO_PARAM_VRING_INTERRUPT:
+ default:
vring_interrupt(0, vq);
+ break;
+ }
}
/*
@@ -383,6 +435,8 @@
kvm_devices = (void *) real_memory_size;
+ INIT_WORK(&hotplug_work, hotplug_devices);
+
ctl_set_bit(0, 9);
register_external_interrupt(0x2603, kvm_extint_handler);
diff --git a/drivers/video/aty/radeon_i2c.c b/drivers/video/aty/radeon_i2c.c
index 359fc64..78d1f4c 100644
--- a/drivers/video/aty/radeon_i2c.c
+++ b/drivers/video/aty/radeon_i2c.c
@@ -7,7 +7,6 @@
#include <linux/i2c.h>
-#include <linux/i2c-id.h>
#include <linux/i2c-algo-bit.h>
#include <asm/io.h>
diff --git a/drivers/video/i810/i810.h b/drivers/video/i810/i810.h
index 328ae6c..f37de60e 100644
--- a/drivers/video/i810/i810.h
+++ b/drivers/video/i810/i810.h
@@ -17,7 +17,6 @@
#include <linux/agp_backend.h>
#include <linux/fb.h>
#include <linux/i2c.h>
-#include <linux/i2c-id.h>
#include <linux/i2c-algo-bit.h>
#include <video/vga.h>
diff --git a/drivers/video/intelfb/intelfb_i2c.c b/drivers/video/intelfb/intelfb_i2c.c
index 487f2be..3300bd3 100644
--- a/drivers/video/intelfb/intelfb_i2c.c
+++ b/drivers/video/intelfb/intelfb_i2c.c
@@ -32,7 +32,6 @@
#include <linux/fb.h>
#include <linux/i2c.h>
-#include <linux/i2c-id.h>
#include <linux/i2c-algo-bit.h>
#include <asm/io.h>
diff --git a/drivers/video/savage/savagefb.h b/drivers/video/savage/savagefb.h
index 8bfdfc3..e4c3f21 100644
--- a/drivers/video/savage/savagefb.h
+++ b/drivers/video/savage/savagefb.h
@@ -13,7 +13,6 @@
#define __SAVAGEFB_H__
#include <linux/i2c.h>
-#include <linux/i2c-id.h>
#include <linux/i2c-algo-bit.h>
#include <linux/mutex.h>
#include <video/vga.h>
diff --git a/include/crypto/cryptd.h b/include/crypto/cryptd.h
index 1c96b25..ba98918 100644
--- a/include/crypto/cryptd.h
+++ b/include/crypto/cryptd.h
@@ -1,5 +1,12 @@
/*
* Software async crypto daemon
+ *
+ * Added AEAD support to cryptd.
+ * Authors: Tadeusz Struk (tadeusz.struk@intel.com)
+ * Adrian Hoban <adrian.hoban@intel.com>
+ * Gabriele Paoloni <gabriele.paoloni@intel.com>
+ * Aidan O'Mahony (aidan.o.mahony@intel.com)
+ * Copyright (c) 2010, Intel Corporation.
*/
#ifndef _CRYPTO_CRYPT_H
@@ -42,4 +49,21 @@
struct shash_desc *cryptd_shash_desc(struct ahash_request *req);
void cryptd_free_ahash(struct cryptd_ahash *tfm);
+struct cryptd_aead {
+ struct crypto_aead base;
+};
+
+static inline struct cryptd_aead *__cryptd_aead_cast(
+ struct crypto_aead *tfm)
+{
+ return (struct cryptd_aead *)tfm;
+}
+
+struct cryptd_aead *cryptd_alloc_aead(const char *alg_name,
+ u32 type, u32 mask);
+
+struct crypto_aead *cryptd_aead_child(struct cryptd_aead *tfm);
+
+void cryptd_free_aead(struct cryptd_aead *tfm);
+
#endif
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 42a0f1d..bb0f56f 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -316,6 +316,7 @@
#define HID_QUIRK_FULLSPEED_INTERVAL 0x10000000
#define HID_QUIRK_NO_INIT_REPORTS 0x20000000
#define HID_QUIRK_NO_IGNORE 0x40000000
+#define HID_QUIRK_NO_INPUT_SYNC 0x80000000
/*
* This is the global environment of the parser. This information is
@@ -626,8 +627,8 @@
int (*event)(struct hid_device *hdev, struct hid_field *field,
struct hid_usage *usage, __s32 value);
- void (*report_fixup)(struct hid_device *hdev, __u8 *buf,
- unsigned int size);
+ __u8 *(*report_fixup)(struct hid_device *hdev, __u8 *buf,
+ unsigned int *size);
int (*input_mapping)(struct hid_device *hdev,
struct hid_input *hidinput, struct hid_field *field,
diff --git a/include/linux/hiddev.h b/include/linux/hiddev.h
index bb6f58b..a3f481a 100644
--- a/include/linux/hiddev.h
+++ b/include/linux/hiddev.h
@@ -226,8 +226,6 @@
void hiddev_hid_event(struct hid_device *hid, struct hid_field *field,
struct hid_usage *usage, __s32 value);
void hiddev_report_event(struct hid_device *hid, struct hid_report *report);
-int __init hiddev_init(void);
-void hiddev_exit(void);
#else
static inline int hiddev_connect(struct hid_device *hid,
unsigned int force)
@@ -236,8 +234,6 @@
static inline void hiddev_hid_event(struct hid_device *hid, struct hid_field *field,
struct hid_usage *usage, __s32 value) { }
static inline void hiddev_report_event(struct hid_device *hid, struct hid_report *report) { }
-static inline int hiddev_init(void) { return 0; }
-static inline void hiddev_exit(void) { }
#endif
#endif
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 4bae0b7..1f66fa0 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -384,11 +384,15 @@
dev_set_drvdata(&dev->dev, data);
}
-static inline int i2c_parent_is_i2c_adapter(const struct i2c_adapter *adapter)
+static inline struct i2c_adapter *
+i2c_parent_is_i2c_adapter(const struct i2c_adapter *adapter)
{
- return adapter->dev.parent != NULL
- && adapter->dev.parent->bus == &i2c_bus_type
- && adapter->dev.parent->type == &i2c_adapter_type;
+ struct device *parent = adapter->dev.parent;
+
+ if (parent != NULL && parent->type == &i2c_adapter_type)
+ return to_i2c_adapter(parent);
+ else
+ return NULL;
}
/* Adapter locking functions, exported for shared pin cases */
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 636fc38..919ae53 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -414,6 +414,14 @@
__u8 pad[64];
};
+/* for KVM_PPC_GET_PVINFO */
+struct kvm_ppc_pvinfo {
+ /* out */
+ __u32 flags;
+ __u32 hcall[4];
+ __u8 pad[108];
+};
+
#define KVMIO 0xAE
/*
@@ -530,6 +538,8 @@
#ifdef __KVM_HAVE_XCRS
#define KVM_CAP_XCRS 56
#endif
+#define KVM_CAP_PPC_GET_PVINFO 57
+#define KVM_CAP_PPC_IRQ_LEVEL 58
#ifdef KVM_CAP_IRQ_ROUTING
@@ -664,6 +674,8 @@
/* Available with KVM_CAP_PIT_STATE2 */
#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2)
#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
+/* Available with KVM_CAP_PPC_GET_PVINFO */
+#define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo)
/*
* ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ac740b2..a055742 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -36,9 +36,10 @@
#define KVM_REQ_PENDING_TIMER 5
#define KVM_REQ_UNHALT 6
#define KVM_REQ_MMU_SYNC 7
-#define KVM_REQ_KVMCLOCK_UPDATE 8
+#define KVM_REQ_CLOCK_UPDATE 8
#define KVM_REQ_KICK 9
#define KVM_REQ_DEACTIVATE_FPU 10
+#define KVM_REQ_EVENT 11
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
@@ -289,6 +290,9 @@
void kvm_disable_largepages(void);
void kvm_arch_flush_shadow(struct kvm *kvm);
+int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
+ int nr_pages);
+
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
void kvm_release_page_clean(struct page *page);
@@ -296,6 +300,8 @@
void kvm_set_page_dirty(struct page *page);
void kvm_set_page_accessed(struct page *page);
+pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
+pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn);
@@ -477,8 +483,7 @@
struct kvm_assigned_dev_kernel *assigned_dev);
#else /* CONFIG_IOMMU_API */
static inline int kvm_iommu_map_pages(struct kvm *kvm,
- gfn_t base_gfn,
- unsigned long npages)
+ struct kvm_memory_slot *slot)
{
return 0;
}
@@ -518,11 +523,22 @@
current->flags &= ~PF_VCPU;
}
+static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
+ gfn_t gfn)
+{
+ return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
+}
+
static inline gpa_t gfn_to_gpa(gfn_t gfn)
{
return (gpa_t)gfn << PAGE_SHIFT;
}
+static inline gfn_t gpa_to_gfn(gpa_t gpa)
+{
+ return (gfn_t)(gpa >> PAGE_SHIFT);
+}
+
static inline hpa_t pfn_to_hpa(pfn_t pfn)
{
return (hpa_t)pfn << PAGE_SHIFT;
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
index d731092..47a070b 100644
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -17,6 +17,8 @@
#define KVM_HC_VAPIC_POLL_IRQ 1
#define KVM_HC_MMU_OP 2
+#define KVM_HC_FEATURES 3
+#define KVM_HC_PPC_MAP_MAGIC_PAGE 4
/*
* hypercalls use architecture specific
@@ -24,11 +26,6 @@
#include <asm/kvm_para.h>
#ifdef __KERNEL__
-#ifdef CONFIG_KVM_GUEST
-void __init kvm_guest_init(void);
-#else
-#define kvm_guest_init() do { } while (0)
-#endif
static inline int kvm_para_has_feature(unsigned int feature)
{
diff --git a/include/linux/padata.h b/include/linux/padata.h
index bdcd1e9..4633b2f 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -127,8 +127,8 @@
*/
struct parallel_data {
struct padata_instance *pinst;
- struct padata_parallel_queue *pqueue;
- struct padata_serial_queue *squeue;
+ struct padata_parallel_queue __percpu *pqueue;
+ struct padata_serial_queue __percpu *squeue;
atomic_t seq_nr;
atomic_t reorder_objects;
atomic_t refcnt;
diff --git a/mm/util.c b/mm/util.c
index 4735ea4..73dac81 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -245,6 +245,19 @@
}
#endif
+/*
+ * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
+ * back to the regular GUP.
+ * If the architecture not support this fucntion, simply return with no
+ * page pinned
+ */
+int __attribute__((weak)) __get_user_pages_fast(unsigned long start,
+ int nr_pages, int write, struct page **pages)
+{
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__get_user_pages_fast);
+
/**
* get_user_pages_fast() - pin user pages in memory
* @start: starting user address
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 369e380..8edca91 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -17,7 +17,7 @@
* Authors:
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
*
- * Copyright 2010 Red Hat, Inc. and/or its affilates.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
*/
#include <linux/kvm_host.h>
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 60e5e46..5225052 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -5,7 +5,7 @@
* machines without emulation or binary translation.
*
* Copyright (C) 2006 Qumranet, Inc.
- * Copyright 2010 Red Hat, Inc. and/or its affilates.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
*
* Authors:
* Avi Kivity <avi@qumranet.com>
@@ -705,14 +705,12 @@
if (r)
goto out_free;
-#ifdef CONFIG_DMAR
/* map the pages in iommu page table */
if (npages) {
r = kvm_iommu_map_pages(kvm, &new);
if (r)
goto out_free;
}
-#endif
r = -ENOMEM;
slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
@@ -927,35 +925,46 @@
return memslot - slots->memslots;
}
-static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
-{
- return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
-}
-
-unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
+static unsigned long gfn_to_hva_many(struct kvm *kvm, gfn_t gfn,
+ gfn_t *nr_pages)
{
struct kvm_memory_slot *slot;
slot = gfn_to_memslot(kvm, gfn);
if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
return bad_hva();
+
+ if (nr_pages)
+ *nr_pages = slot->npages - (gfn - slot->base_gfn);
+
return gfn_to_hva_memslot(slot, gfn);
}
+
+unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
+{
+ return gfn_to_hva_many(kvm, gfn, NULL);
+}
EXPORT_SYMBOL_GPL(gfn_to_hva);
-static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
+static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic)
{
struct page *page[1];
int npages;
pfn_t pfn;
- might_sleep();
-
- npages = get_user_pages_fast(addr, 1, 1, page);
+ if (atomic)
+ npages = __get_user_pages_fast(addr, 1, 1, page);
+ else {
+ might_sleep();
+ npages = get_user_pages_fast(addr, 1, 1, page);
+ }
if (unlikely(npages != 1)) {
struct vm_area_struct *vma;
+ if (atomic)
+ goto return_fault_page;
+
down_read(¤t->mm->mmap_sem);
if (is_hwpoison_address(addr)) {
up_read(¤t->mm->mmap_sem);
@@ -968,6 +977,7 @@
if (vma == NULL || addr < vma->vm_start ||
!(vma->vm_flags & VM_PFNMAP)) {
up_read(¤t->mm->mmap_sem);
+return_fault_page:
get_page(fault_page);
return page_to_pfn(fault_page);
}
@@ -981,7 +991,13 @@
return pfn;
}
-pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
+{
+ return hva_to_pfn(kvm, addr, true);
+}
+EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
+
+static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic)
{
unsigned long addr;
@@ -991,7 +1007,18 @@
return page_to_pfn(bad_page);
}
- return hva_to_pfn(kvm, addr);
+ return hva_to_pfn(kvm, addr, atomic);
+}
+
+pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
+{
+ return __gfn_to_pfn(kvm, gfn, true);
+}
+EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
+
+pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+{
+ return __gfn_to_pfn(kvm, gfn, false);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn);
@@ -999,9 +1026,26 @@
struct kvm_memory_slot *slot, gfn_t gfn)
{
unsigned long addr = gfn_to_hva_memslot(slot, gfn);
- return hva_to_pfn(kvm, addr);
+ return hva_to_pfn(kvm, addr, false);
}
+int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
+ int nr_pages)
+{
+ unsigned long addr;
+ gfn_t entry;
+
+ addr = gfn_to_hva_many(kvm, gfn, &entry);
+ if (kvm_is_error_hva(addr))
+ return -1;
+
+ if (entry < nr_pages)
+ return 0;
+
+ return __get_user_pages_fast(addr, nr_pages, 1, pages);
+}
+EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
+
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
{
pfn_t pfn;
@@ -1964,7 +2008,9 @@
case CPU_STARTING:
printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
cpu);
+ spin_lock(&kvm_lock);
hardware_enable(NULL);
+ spin_unlock(&kvm_lock);
break;
}
return NOTIFY_OK;
@@ -1977,7 +2023,7 @@
/* spin while reset goes on */
local_irq_enable();
while (true)
- ;
+ cpu_relax();
}
/* Fault while not rebooting. We want the trace. */
BUG();
@@ -2171,8 +2217,10 @@
static int kvm_resume(struct sys_device *dev)
{
- if (kvm_usage_count)
+ if (kvm_usage_count) {
+ WARN_ON(spin_is_locked(&kvm_lock));
hardware_enable(NULL);
+ }
return 0;
}