summaryrefslogtreecommitdiff
blob: 096656ad62436292b4145ed226fd65aa9e71948a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
From a603386b422f5cb4c5e2639a7e20a1d99dba2175 Mon Sep 17 00:00:00 2001
From: Julien Grall <jgrall@amazon.com>
Date: Tue, 11 Oct 2022 14:54:44 +0200
Subject: [PATCH 11/87] xen/x86: p2m: Add preemption in p2m_teardown()

The list p2m->pages contain all the pages used by the P2M. On large
instance this can be quite large and the time spent to call
d->arch.paging.free_page() will take more than 1ms for a 80GB guest
on a Xen running in nested environment on a c5.metal.

By extrapolation, it would take > 100ms for a 8TB guest (what we
current security support). So add some preemption in p2m_teardown()
and propagate to the callers. Note there are 3 places where
the preemption is not enabled:
    - hap_final_teardown()/shadow_final_teardown(): We are
      preventing update the P2M once the domain is dying (so
      no more pages could be allocated) and most of the P2M pages
      will be freed in preemptive manneer when relinquishing the
      resources. So this is fine to disable preemption.
    - shadow_enable(): This is fine because it will undo the allocation
      that may have been made by p2m_alloc_table() (so only the root
      page table).

The preemption is arbitrarily checked every 1024 iterations.

We now need to include <xen/event.h> in p2m-basic in order to
import the definition for local_events_need_delivery() used by
general_preempt_check(). Ideally, the inclusion should happen in
xen/sched.h but it opened a can of worms.

Note that with the current approach, Xen doesn't keep track on whether
the alt/nested P2Ms have been cleared. So there are some redundant work.
However, this is not expected to incurr too much overhead (the P2M lock
shouldn't be contended during teardown). So this is optimization is
left outside of the security event.

This is part of CVE-2022-33746 / XSA-410.

Signed-off-by: Julien Grall <jgrall@amazon.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
master commit: 8a2111250b424edc49c65c4d41b276766d30635c
master date: 2022-10-11 14:24:48 +0200
---
 xen/arch/x86/mm/hap/hap.c       | 22 ++++++++++++++++------
 xen/arch/x86/mm/p2m.c           | 18 +++++++++++++++---
 xen/arch/x86/mm/shadow/common.c | 12 +++++++++---
 xen/include/asm-x86/p2m.h       |  2 +-
 4 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index a44fcfd95e1e..1f9a157a0c34 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -548,17 +548,17 @@ void hap_final_teardown(struct domain *d)
 
     if ( hvm_altp2m_supported() )
         for ( i = 0; i < MAX_ALTP2M; i++ )
-            p2m_teardown(d->arch.altp2m_p2m[i], true);
+            p2m_teardown(d->arch.altp2m_p2m[i], true, NULL);
 
     /* Destroy nestedp2m's first */
     for (i = 0; i < MAX_NESTEDP2M; i++) {
-        p2m_teardown(d->arch.nested_p2m[i], true);
+        p2m_teardown(d->arch.nested_p2m[i], true, NULL);
     }
 
     if ( d->arch.paging.hap.total_pages != 0 )
         hap_teardown(d, NULL);
 
-    p2m_teardown(p2m_get_hostp2m(d), true);
+    p2m_teardown(p2m_get_hostp2m(d), true, NULL);
     /* Free any memory that the p2m teardown released */
     paging_lock(d);
     hap_set_allocation(d, 0, NULL);
@@ -612,14 +612,24 @@ void hap_teardown(struct domain *d, bool *preempted)
         FREE_XENHEAP_PAGE(d->arch.altp2m_visible_eptp);
 
         for ( i = 0; i < MAX_ALTP2M; i++ )
-            p2m_teardown(d->arch.altp2m_p2m[i], false);
+        {
+            p2m_teardown(d->arch.altp2m_p2m[i], false, preempted);
+            if ( preempted && *preempted )
+                return;
+        }
     }
 
     /* Destroy nestedp2m's after altp2m. */
     for ( i = 0; i < MAX_NESTEDP2M; i++ )
-        p2m_teardown(d->arch.nested_p2m[i], false);
+    {
+        p2m_teardown(d->arch.nested_p2m[i], false, preempted);
+        if ( preempted && *preempted )
+            return;
+    }
 
-    p2m_teardown(p2m_get_hostp2m(d), false);
+    p2m_teardown(p2m_get_hostp2m(d), false, preempted);
+    if ( preempted && *preempted )
+        return;
 
     paging_lock(d); /* Keep various asserts happy */
 
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index aba4f17cbe12..8781df9dda8d 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -749,12 +749,13 @@ int p2m_alloc_table(struct p2m_domain *p2m)
  * hvm fixme: when adding support for pvh non-hardware domains, this path must
  * cleanup any foreign p2m types (release refcnts on them).
  */
-void p2m_teardown(struct p2m_domain *p2m, bool remove_root)
+void p2m_teardown(struct p2m_domain *p2m, bool remove_root, bool *preempted)
 /* Return all the p2m pages to Xen.
  * We know we don't have any extra mappings to these pages */
 {
     struct page_info *pg, *root_pg = NULL;
     struct domain *d;
+    unsigned int i = 0;
 
     if (p2m == NULL)
         return;
@@ -773,8 +774,19 @@ void p2m_teardown(struct p2m_domain *p2m, bool remove_root)
     }
 
     while ( (pg = page_list_remove_head(&p2m->pages)) )
-        if ( pg != root_pg )
-            d->arch.paging.free_page(d, pg);
+    {
+        if ( pg == root_pg )
+            continue;
+
+        d->arch.paging.free_page(d, pg);
+
+        /* Arbitrarily check preemption every 1024 iterations */
+        if ( preempted && !(++i % 1024) && general_preempt_check() )
+        {
+            *preempted = true;
+            break;
+        }
+    }
 
     if ( root_pg )
         page_list_add(root_pg, &p2m->pages);
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index ac9a1ae07808..3b0d781991b5 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -2770,8 +2770,12 @@ int shadow_enable(struct domain *d, u32 mode)
  out_locked:
     paging_unlock(d);
  out_unlocked:
+    /*
+     * This is fine to ignore the preemption here because only the root
+     * will be allocated by p2m_alloc_table().
+     */
     if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) )
-        p2m_teardown(p2m, true);
+        p2m_teardown(p2m, true, NULL);
     if ( rv != 0 && pg != NULL )
     {
         pg->count_info &= ~PGC_count_mask;
@@ -2824,7 +2828,9 @@ void shadow_teardown(struct domain *d, bool *preempted)
     for_each_vcpu ( d, v )
         shadow_vcpu_teardown(v);
 
-    p2m_teardown(p2m_get_hostp2m(d), false);
+    p2m_teardown(p2m_get_hostp2m(d), false, preempted);
+    if ( preempted && *preempted )
+        return;
 
     paging_lock(d);
 
@@ -2945,7 +2951,7 @@ void shadow_final_teardown(struct domain *d)
         shadow_teardown(d, NULL);
 
     /* It is now safe to pull down the p2m map. */
-    p2m_teardown(p2m_get_hostp2m(d), true);
+    p2m_teardown(p2m_get_hostp2m(d), true, NULL);
     /* Free any shadow memory that the p2m teardown released */
     paging_lock(d);
     shadow_set_allocation(d, 0, NULL);
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index c3c16748e7d5..2db9ab0122f2 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -574,7 +574,7 @@ int p2m_init(struct domain *d);
 int p2m_alloc_table(struct p2m_domain *p2m);
 
 /* Return all the p2m resources to Xen. */
-void p2m_teardown(struct p2m_domain *p2m, bool remove_root);
+void p2m_teardown(struct p2m_domain *p2m, bool remove_root, bool *preempted);
 void p2m_final_teardown(struct domain *d);
 
 /* Add a page to a domain's p2m table */
-- 
2.37.4