diff options
Diffstat (limited to '0019-tools-oxenstored-Keep-dev-xen-evtchn-open-across-liv.patch')
-rw-r--r-- | 0019-tools-oxenstored-Keep-dev-xen-evtchn-open-across-liv.patch | 367 |
1 files changed, 367 insertions, 0 deletions
diff --git a/0019-tools-oxenstored-Keep-dev-xen-evtchn-open-across-liv.patch b/0019-tools-oxenstored-Keep-dev-xen-evtchn-open-across-liv.patch new file mode 100644 index 0000000..f6ae3fe --- /dev/null +++ b/0019-tools-oxenstored-Keep-dev-xen-evtchn-open-across-liv.patch @@ -0,0 +1,367 @@ +From f02171b663393e10d35123e5572c0f5b3e72c29d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com> +Date: Thu, 3 Nov 2022 15:31:39 +0000 +Subject: [PATCH 19/89] tools/oxenstored: Keep /dev/xen/evtchn open across live + update +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Closing the evtchn handle will unbind and free all local ports. The new +xenstored would need to rebind all evtchns, which is work that we don't want +or need to be doing during the critical handover period. + +However, it turns out that the Windows PV drivers also rebind their local port +too across suspend/resume, leaving (o)xenstored with a stale idea of the +remote port to use. In this case, reusing the established connection is the +only robust option. + +Therefore: + * Have oxenstored open /dev/xen/evtchn without CLOEXEC at start of day. + * Extend the handover information with the evtchn fd, domexc virq local port, + and the local port number for each domain connection. + * Have (the new) oxenstored recover the open handle using Xeneventchn.fdopen, + and use the provided local ports rather than trying to rebind them. + +When this new information isn't present (i.e. live updating from an oxenstored +prior to this change), the best-effort status quo will have to do. + +Signed-off-by: Edwin Török <edvin.torok@citrix.com> +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Acked-by: Christian Lindig <christian.lindig@citrix.com> +(cherry picked from commit 9b224c25293a53fcbe32da68052d861dda71a6f4) +--- + tools/ocaml/xenstored/domain.ml | 13 +++-- + tools/ocaml/xenstored/domains.ml | 9 ++-- + tools/ocaml/xenstored/event.ml | 20 +++++-- + tools/ocaml/xenstored/process.ml | 2 +- + tools/ocaml/xenstored/xenstored.ml | 85 ++++++++++++++++++++---------- + 5 files changed, 90 insertions(+), 39 deletions(-) + +diff --git a/tools/ocaml/xenstored/domain.ml b/tools/ocaml/xenstored/domain.ml +index 481e10794d..5c15752a37 100644 +--- a/tools/ocaml/xenstored/domain.ml ++++ b/tools/ocaml/xenstored/domain.ml +@@ -74,7 +74,8 @@ let is_paused_for_conflict dom = dom.conflict_credit <= 0.0 + let is_free_to_conflict = is_dom0 + + let dump d chan = +- fprintf chan "dom,%d,%nd,%d\n" d.id d.mfn d.ports.remote ++ fprintf chan "dom,%d,%nd,%d,%d\n" ++ d.id d.mfn d.ports.remote (Xeneventchn.to_int d.ports.local) + + let rebind_evtchn d remote_port = + Event.unbind d.eventchn d.ports.local; +@@ -93,8 +94,14 @@ let close dom = + dom.ports <- invalid_ports; + Xenmmap.unmap dom.interface + +-let make id mfn remote_port interface eventchn = +- let local = Event.bind_interdomain eventchn id remote_port in ++(* On clean start, local_port will be None, and we must bind the remote port ++ given. On Live Update, the event channel is already bound, and both the ++ local and remote port numbers come from the transfer record. *) ++let make ?local_port ~remote_port id mfn interface eventchn = ++ let local = match local_port with ++ | None -> Event.bind_interdomain eventchn id remote_port ++ | Some p -> Xeneventchn.of_int p ++ in + let ports = { local; remote = remote_port } in + debug "domain %d bind %s" id (string_of_port_pair ports); + { +diff --git a/tools/ocaml/xenstored/domains.ml b/tools/ocaml/xenstored/domains.ml +index 2ab0c5f4d8..b6c075c838 100644 +--- a/tools/ocaml/xenstored/domains.ml ++++ b/tools/ocaml/xenstored/domains.ml +@@ -56,6 +56,7 @@ let exist doms id = Hashtbl.mem doms.table id + let find doms id = Hashtbl.find doms.table id + let number doms = Hashtbl.length doms.table + let iter doms fct = Hashtbl.iter (fun _ b -> fct b) doms.table ++let eventchn doms = doms.eventchn + + let rec is_empty_queue q = + Queue.is_empty q || +@@ -122,16 +123,16 @@ let cleanup doms = + let resume _doms _domid = + () + +-let create doms domid mfn remote_port = ++let create doms ?local_port ~remote_port domid mfn = + let interface = Xenctrl.map_foreign_range xc domid (Xenmmap.getpagesize()) mfn in +- let dom = Domain.make domid mfn remote_port interface doms.eventchn in ++ let dom = Domain.make ?local_port ~remote_port domid mfn interface doms.eventchn in + Hashtbl.add doms.table domid dom; + dom + + let xenstored_kva = ref "" + let xenstored_port = ref "" + +-let create0 doms = ++let create0 ?local_port doms = + let remote_port = Utils.read_file_single_integer !xenstored_port in + + let interface = +@@ -141,7 +142,7 @@ let create0 doms = + interface + in + +- let dom = Domain.make 0 Nativeint.zero remote_port interface doms.eventchn in ++ let dom = Domain.make ?local_port ~remote_port 0 Nativeint.zero interface doms.eventchn in + Hashtbl.add doms.table 0 dom; + Domain.notify dom; + dom +diff --git a/tools/ocaml/xenstored/event.ml b/tools/ocaml/xenstored/event.ml +index a3be296374..629dc6041b 100644 +--- a/tools/ocaml/xenstored/event.ml ++++ b/tools/ocaml/xenstored/event.ml +@@ -20,9 +20,18 @@ type t = { + domexc: Xeneventchn.t; + } + +-let init () = +- let handle = Xeneventchn.init () in +- let domexc = Xeneventchn.bind_dom_exc_virq handle in ++(* On clean start, both parameters will be None, and we must open the evtchn ++ handle and bind the DOM_EXC VIRQ. On Live Update, the fd is preserved ++ across exec(), and the DOM_EXC VIRQ still bound. *) ++let init ?fd ?domexc_port () = ++ let handle = match fd with ++ | None -> Xeneventchn.init ~cloexec:false () ++ | Some fd -> fd |> Utils.FD.of_int |> Xeneventchn.fdopen ++ in ++ let domexc = match domexc_port with ++ | None -> Xeneventchn.bind_dom_exc_virq handle ++ | Some p -> Xeneventchn.of_int p ++ in + { handle; domexc } + + let fd eventchn = Xeneventchn.fd eventchn.handle +@@ -31,3 +40,8 @@ let unbind eventchn port = Xeneventchn.unbind eventchn.handle port + let notify eventchn port = Xeneventchn.notify eventchn.handle port + let pending eventchn = Xeneventchn.pending eventchn.handle + let unmask eventchn port = Xeneventchn.unmask eventchn.handle port ++ ++let dump e chan = ++ Printf.fprintf chan "evtchn-dev,%d,%d\n" ++ (Utils.FD.to_int @@ Xeneventchn.fd e.handle) ++ (Xeneventchn.to_int e.domexc) +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 1c80e7198d..02bd0f7d80 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -573,7 +573,7 @@ let do_introduce con t domains cons data = + end; + edom + else try +- let ndom = Domains.create domains domid mfn remote_port in ++ let ndom = Domains.create ~remote_port domains domid mfn in + Connections.add_domain cons ndom; + Connections.fire_spec_watches (Transaction.get_root t) cons Store.Path.introduce_domain; + ndom +diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml +index 1f11f576b5..f526f4fb23 100644 +--- a/tools/ocaml/xenstored/xenstored.ml ++++ b/tools/ocaml/xenstored/xenstored.ml +@@ -144,7 +144,7 @@ exception Bad_format of string + + let dump_format_header = "$xenstored-dump-format" + +-let from_channel_f chan global_f socket_f domain_f watch_f store_f = ++let from_channel_f chan global_f evtchn_f socket_f domain_f watch_f store_f = + let unhexify s = Utils.unhexify s in + let getpath s = + let u = Utils.unhexify s in +@@ -165,12 +165,19 @@ let from_channel_f chan global_f socket_f domain_f watch_f store_f = + (* there might be more parameters here, + e.g. a RO socket from a previous version: ignore it *) + global_f ~rw ++ | "evtchn-dev" :: fd :: domexc_port :: [] -> ++ evtchn_f ~fd:(int_of_string fd) ++ ~domexc_port:(int_of_string domexc_port) + | "socket" :: fd :: [] -> + socket_f ~fd:(int_of_string fd) +- | "dom" :: domid :: mfn :: remote_port :: []-> +- domain_f (int_of_string domid) +- (Nativeint.of_string mfn) +- (int_of_string remote_port) ++ | "dom" :: domid :: mfn :: remote_port :: rest -> ++ let local_port = match rest with ++ | [] -> None (* backward compat: old version didn't have it *) ++ | local_port :: _ -> Some (int_of_string local_port) in ++ domain_f ?local_port ++ ~remote_port:(int_of_string remote_port) ++ (int_of_string domid) ++ (Nativeint.of_string mfn) + | "watch" :: domid :: path :: token :: [] -> + watch_f (int_of_string domid) + (unhexify path) (unhexify token) +@@ -189,10 +196,21 @@ let from_channel_f chan global_f socket_f domain_f watch_f store_f = + done; + info "Completed loading xenstore dump" + +-let from_channel store cons doms chan = ++let from_channel store cons domains_init chan = + (* don't let the permission get on our way, full perm ! *) + let op = Store.get_ops store Perms.Connection.full_rights in + let rwro = ref (None) in ++ let doms = ref (None) in ++ ++ let require_doms () = ++ match !doms with ++ | None -> ++ warn "No event channel file descriptor available in dump!"; ++ let domains = domains_init @@ Event.init () in ++ doms := Some domains; ++ domains ++ | Some d -> d ++ in + let global_f ~rw = + let get_listen_sock sockfd = + let fd = sockfd |> int_of_string |> Utils.FD.of_int in +@@ -201,6 +219,10 @@ let from_channel store cons doms chan = + in + rwro := get_listen_sock rw + in ++ let evtchn_f ~fd ~domexc_port = ++ let evtchn = Event.init ~fd ~domexc_port () in ++ doms := Some(domains_init evtchn) ++ in + let socket_f ~fd = + let ufd = Utils.FD.of_int fd in + let is_valid = try (Unix.fstat ufd).Unix.st_kind = Unix.S_SOCK with _ -> false in +@@ -209,12 +231,13 @@ let from_channel store cons doms chan = + else + warn "Ignoring invalid socket FD %d" fd + in +- let domain_f domid mfn remote_port = ++ let domain_f ?local_port ~remote_port domid mfn = ++ let doms = require_doms () in + let ndom = + if domid > 0 then +- Domains.create doms domid mfn remote_port ++ Domains.create ?local_port ~remote_port doms domid mfn + else +- Domains.create0 doms ++ Domains.create0 ?local_port doms + in + Connections.add_domain cons ndom; + in +@@ -229,8 +252,8 @@ let from_channel store cons doms chan = + op.Store.write path value; + op.Store.setperms path perms + in +- from_channel_f chan global_f socket_f domain_f watch_f store_f; +- !rwro ++ from_channel_f chan global_f evtchn_f socket_f domain_f watch_f store_f; ++ !rwro, require_doms () + + let from_file store cons doms file = + info "Loading xenstore dump from %s" file; +@@ -238,7 +261,7 @@ let from_file store cons doms file = + finally (fun () -> from_channel store doms cons channel) + (fun () -> close_in channel) + +-let to_channel store cons rw chan = ++let to_channel store cons (rw, evtchn) chan = + let hexify s = Utils.hexify s in + + fprintf chan "%s\n" dump_format_header; +@@ -248,6 +271,9 @@ let to_channel store cons rw chan = + Utils.FD.to_int fd in + fprintf chan "global,%d\n" (fdopt rw); + ++ (* dump evtchn device info *) ++ Event.dump evtchn chan; ++ + (* dump connections related to domains: domid, mfn, eventchn port/ sockets, and watches *) + Connections.iter cons (fun con -> Connection.dump con chan); + +@@ -367,7 +393,6 @@ let _ = + | None -> () end; + + let store = Store.create () in +- let eventchn = Event.init () in + let next_frequent_ops = ref 0. in + let advance_next_frequent_ops () = + next_frequent_ops := (Unix.gettimeofday () +. !Define.conflict_max_history_seconds) +@@ -375,16 +400,8 @@ let _ = + let delay_next_frequent_ops_by duration = + next_frequent_ops := !next_frequent_ops +. duration + in +- let domains = Domains.init eventchn advance_next_frequent_ops in ++ let domains_init eventchn = Domains.init eventchn advance_next_frequent_ops in + +- (* For things that need to be done periodically but more often +- * than the periodic_ops function *) +- let frequent_ops () = +- if Unix.gettimeofday () > !next_frequent_ops then ( +- History.trim (); +- Domains.incr_conflict_credit domains; +- advance_next_frequent_ops () +- ) in + let cons = Connections.create () in + + let quit = ref false in +@@ -393,14 +410,15 @@ let _ = + List.iter (fun path -> + Store.write store Perms.Connection.full_rights path "") Store.Path.specials; + +- let rw_sock = ++ let rw_sock, domains = + if cf.restart && Sys.file_exists Disk.xs_daemon_database then ( +- let rwro = DB.from_file store domains cons Disk.xs_daemon_database in ++ let rw, domains = DB.from_file store domains_init cons Disk.xs_daemon_database in + info "Live reload: database loaded"; + Process.LiveUpdate.completed (); +- rwro ++ rw, domains + ) else ( + info "No live reload: regular startup"; ++ let domains = domains_init @@ Event.init () in + if !Disk.enable then ( + info "reading store from disk"; + Disk.read store +@@ -413,9 +431,18 @@ let _ = + if cf.domain_init then ( + Connections.add_domain cons (Domains.create0 domains); + ); +- rw_sock ++ rw_sock, domains + ) in + ++ (* For things that need to be done periodically but more often ++ * than the periodic_ops function *) ++ let frequent_ops () = ++ if Unix.gettimeofday () > !next_frequent_ops then ( ++ History.trim (); ++ Domains.incr_conflict_credit domains; ++ advance_next_frequent_ops () ++ ) in ++ + (* required for xenstore-control to detect availability of live-update *) + let tool_path = Store.Path.of_string "/tool" in + if not (Store.path_exists store tool_path) then +@@ -430,8 +457,10 @@ let _ = + Sys.set_signal Sys.sigusr1 (Sys.Signal_handle (fun _ -> sigusr1_handler store)); + Sys.set_signal Sys.sigpipe Sys.Signal_ignore; + ++ let eventchn = Domains.eventchn domains in ++ + if cf.activate_access_log then begin +- let post_rotate () = DB.to_file store cons (None) Disk.xs_daemon_database in ++ let post_rotate () = DB.to_file store cons (None, eventchn) Disk.xs_daemon_database in + Logging.init_access_log post_rotate + end; + +@@ -593,7 +622,7 @@ let _ = + live_update := Process.LiveUpdate.should_run cons; + if !live_update || !quit then begin + (* don't initiate live update if saving state fails *) +- DB.to_file store cons (rw_sock) Disk.xs_daemon_database; ++ DB.to_file store cons (rw_sock, eventchn) Disk.xs_daemon_database; + quit := true; + end + with exc -> +-- +2.40.0 + |