diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs index beba8ed314b67..c50cee9f14e73 100644 --- a/compiler/rustc_ast/src/ast.rs +++ b/compiler/rustc_ast/src/ast.rs @@ -1552,6 +1552,7 @@ pub enum ExprKind { pub enum ForLoopKind { For, ForAwait, + CilkFor, } /// Used to differentiate between `async {}` blocks and `gen {}` blocks. diff --git a/compiler/rustc_ast_lowering/src/expr.rs b/compiler/rustc_ast_lowering/src/expr.rs index 59e593c1e7dcd..21e305db2bddf 100644 --- a/compiler/rustc_ast_lowering/src/expr.rs +++ b/compiler/rustc_ast_lowering/src/expr.rs @@ -1606,10 +1606,15 @@ impl<'hir> LoweringContext<'_, 'hir> { }; // Some() => , + // For cilk_for we instead perform Some() => cilk_spawn . let some_arm = { let some_pat = self.pat_some(pat_span, pat); let body_block = self.with_loop_scope(e.id, |this| this.lower_block(body, false)); - let body_expr = self.arena.alloc(self.expr_block(body_block)); + let body_expr = if matches!(loop_kind, ForLoopKind::CilkFor) { + self.arena.alloc(self.expr_spawn_block(body_block)) + } else { + self.arena.alloc(self.expr_block(body_block)) + }; self.arm(some_pat, body_expr) }; @@ -1621,7 +1626,7 @@ impl<'hir> LoweringContext<'_, 'hir> { let match_expr = { let iter = self.expr_ident(head_span, iter, iter_pat_nid); let next_expr = match loop_kind { - ForLoopKind::For => { + ForLoopKind::For | ForLoopKind::CilkFor => { // `Iterator::next(&mut iter)` let ref_mut_iter = self.expr_mut_addr_of(head_span, iter); self.expr_call_lang_item_fn( @@ -1660,10 +1665,17 @@ impl<'hir> LoweringContext<'_, 'hir> { let loop_block = self.block_all(for_span, arena_vec![self; match_stmt], None); // `[opt_ident]: loop { ... }` + // If we have a cilk_for we want to indicate this in the loop so the header can be + // annotated correctly later. + let loop_source = if matches!(loop_kind, ForLoopKind::CilkFor) { + hir::LoopSource::CilkFor + } else { + hir::LoopSource::ForLoop + }; let kind = hir::ExprKind::Loop( loop_block, self.lower_label(opt_label), - hir::LoopSource::ForLoop, + loop_source, self.lower_span(for_span.with_hi(head.span.hi())), ); let loop_expr = @@ -1700,6 +1712,17 @@ impl<'hir> LoweringContext<'_, 'hir> { let iter = self.arena.alloc(self.expr_unsafe(iter)); iter } + ForLoopKind::CilkFor => { + // FIXME(jhilton): this should call something for converting to a random-access iterator + // or otherwise semantically check that the expression is a random-access iterator (or + // only work on ranges for now). + // `::std::iter::IntoIterator::into_iter()` + self.expr_call_lang_item_fn( + head_span, + hir::LangItem::IntoIterIntoIter, + arena_vec![self; head], + ) + } }; let match_expr = self.arena.alloc(self.expr_match( @@ -2059,6 +2082,11 @@ impl<'hir> LoweringContext<'_, 'hir> { self.expr(b.span, hir::ExprKind::Block(b, None)) } + fn expr_spawn_block(&mut self, b: &'hir hir::Block<'hir>) -> hir::Expr<'hir> { + let block = self.arena.alloc(self.expr_block(b)); + self.expr(b.span, hir::ExprKind::CilkSpawn(block)) + } + pub(super) fn expr_array_ref( &mut self, span: Span, diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs index d5212b041634b..7be9ecb7d164b 100644 --- a/compiler/rustc_codegen_llvm/src/builder.rs +++ b/compiler/rustc_codegen_llvm/src/builder.rs @@ -186,10 +186,8 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { } } - fn br(&mut self, dest: &'ll BasicBlock) { - unsafe { - llvm::LLVMBuildBr(self.llbuilder, dest); - } + fn br(&mut self, dest: &'ll BasicBlock) -> &'ll Value { + unsafe { llvm::LLVMBuildBr(self.llbuilder, dest) } } fn cond_br( @@ -681,6 +679,42 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { } } + fn tapir_loop_spawn_strategy_metadata(&mut self, branch: &'ll Value) { + // NOTE(jhilton): I think we can't use llvm.loop.mustprogress since ranges can be unbounded. + const TAPIR_LOOP_SPAWN_STRATEGY: &'static str = "tapir.loop.spawn.strategy"; + const LOOP_DIVIDE_AND_CONQUER: i32 = 1; + unsafe { + let temp = llvm::TemporaryMetadataNode::new(self.cx.llcx); + // First we need to make the metadata for tapir.loop.spawn.strategy. + let metadata_name = llvm::LLVMMDStringInContext2( + self.cx.llcx, + TAPIR_LOOP_SPAWN_STRATEGY.as_ptr() as *const c_char, + TAPIR_LOOP_SPAWN_STRATEGY.as_bytes().len(), + ); + // Now we need to make the value for the divide-and-conquer strategy. + let metadata_value = llvm::LLVMValueAsMetadata(self.const_i32(LOOP_DIVIDE_AND_CONQUER)); + // Bundle them together into a single node. + let tapir_metadata = [metadata_name, metadata_value]; + let tapir_metadata = llvm::LLVMMDNodeInContext2( + self.cx.llcx, + tapir_metadata.as_ptr(), + tapir_metadata.len(), + ); + + // Now group the loop with all its associated nodes and assign the node to the branch. + let v = [&temp, tapir_metadata]; + let node = llvm::LLVMMDNodeInContext2(self.cx.llcx, v.as_ptr(), v.len()); + // Make the node self-referential: this is what loops expect. + llvm::LLVMRustReplaceMDOperandWith(node, 0, node); + + llvm::LLVMSetMetadata( + branch, + llvm::MD_loop as c_uint, + llvm::LLVMMetadataAsValue(self.cx.llcx, node), + ) + } + } + fn store(&mut self, val: &'ll Value, ptr: &'ll Value, align: Align) -> &'ll Value { self.store_with_flags(val, ptr, align, MemFlags::empty()) } diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 5a05d13e9dc95..ca40593674e7a 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -428,6 +428,7 @@ pub enum MetadataType { MD_mem_parallel_loop_access = 10, MD_nonnull = 11, MD_align = 17, + MD_loop = 18, MD_type = 19, MD_vcall_visibility = 28, MD_noundef = 29, @@ -899,6 +900,17 @@ extern "C" { pub fn LLVMSetMetadata<'a>(Val: &'a Value, KindID: c_uint, Node: &'a Value); pub fn LLVMGlobalSetMetadata<'a>(Val: &'a Value, KindID: c_uint, Metadata: &'a Metadata); pub fn LLVMValueAsMetadata(Node: &Value) -> &Metadata; + pub fn LLVMRustMDGetTemporary(C: &Context) -> &mut Metadata; + pub fn LLVMRustMDDeleteTemporary(Metadata: &mut Metadata); + /// SAFETY: there should exist exactly one reference to the value passed as [Metadata], + /// unless a self-reference is being created, in which case [NewMetadata] may be equal to + /// [Metadata]. + /// We can't use an exclusive borrow to model this constraint. + pub fn LLVMRustReplaceMDOperandWith<'a>( + Metadata: &'a Metadata, + Index: size_t, + NewMetadata: &'a Metadata, + ); // Operations on constants of any type pub fn LLVMConstNull(Ty: &Type) -> &Value; diff --git a/compiler/rustc_codegen_llvm/src/llvm/mod.rs b/compiler/rustc_codegen_llvm/src/llvm/mod.rs index 4f5cc575da6e5..6cf5830ffcd57 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/mod.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/mod.rs @@ -13,6 +13,7 @@ use rustc_data_structures::small_c_str::SmallCStr; use rustc_llvm::RustString; use std::cell::RefCell; use std::ffi::{CStr, CString}; +use std::ops::Deref; use std::str::FromStr; use std::string::FromUtf8Error; @@ -324,3 +325,33 @@ impl Drop for OperandBundleDef<'_> { } } } + +pub struct TemporaryMetadataNode<'a> { + raw: core::ptr::NonNull, + phantom: core::marker::PhantomData<&'a mut Metadata>, +} + +impl<'a> TemporaryMetadataNode<'a> { + pub fn new(llcx: &'a Context) -> Self { + let temp = unsafe { ffi::LLVMRustMDGetTemporary(llcx) }; + Self { raw: temp.into(), phantom: core::marker::PhantomData } + } +} + +impl Deref for TemporaryMetadataNode<'_> { + type Target = Metadata; + + fn deref(&self) -> &Self::Target { + unsafe { self.raw.as_ref() } + } +} + +impl Drop for TemporaryMetadataNode<'_> { + fn drop(&mut self) { + // SAFETY: raw originated from LLVMRustMDGetTemporary so it should be aligned. + // Should also be dereferencable, initialized, and non-aliasing since we don't + // expose raw outside the module.. + let raw = unsafe { self.raw.as_mut() }; + unsafe { ffi::LLVMRustMDDeleteTemporary(raw) } + } +} diff --git a/compiler/rustc_codegen_ssa/src/mir/block.rs b/compiler/rustc_codegen_ssa/src/mir/block.rs index a9276ce116fda..9aa4683c1b071 100644 --- a/compiler/rustc_codegen_ssa/src/mir/block.rs +++ b/compiler/rustc_codegen_ssa/src/mir/block.rs @@ -33,6 +33,11 @@ enum MergingSucc { True, } +enum AddParallelLoopMetadata { + False, + True, +} + /// Used by `FunctionCx::codegen_terminator` for emitting common patterns /// e.g., creating a basic block, calling a function, etc. struct TerminatorCodegenHelper<'tcx> { @@ -124,6 +129,23 @@ impl<'a, 'tcx> TerminatorCodegenHelper<'tcx> { bx: &mut Bx, target: mir::BasicBlock, mergeable_succ: bool, + ) -> MergingSucc { + self.funclet_br_maybe_add_metadata( + fx, + bx, + target, + mergeable_succ, + AddParallelLoopMetadata::False, + ) + } + + fn funclet_br_maybe_add_metadata>( + &self, + fx: &mut FunctionCx<'a, 'tcx, Bx>, + bx: &mut Bx, + target: mir::BasicBlock, + mergeable_succ: bool, + add_parallel_loop_metadata: AddParallelLoopMetadata, ) -> MergingSucc { let (needs_landing_pad, is_cleanupret) = self.llbb_characteristics(fx, target); if mergeable_succ && !needs_landing_pad && !is_cleanupret { @@ -139,7 +161,10 @@ impl<'a, 'tcx> TerminatorCodegenHelper<'tcx> { // to a trampoline. bx.cleanup_ret(self.funclet(fx).unwrap(), Some(lltarget)); } else { - bx.br(lltarget); + let inst = bx.br(lltarget); + if matches!(add_parallel_loop_metadata, AddParallelLoopMetadata::True) { + bx.tapir_loop_spawn_strategy_metadata(inst); + } } MergingSucc::False } @@ -1224,7 +1249,24 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { } mir::TerminatorKind::Goto { target } => { - helper.funclet_br(self, bx, target, mergeable_succ()) + let add_tapir_metadata = if self.parallel_back_edges.contains(bb) { + AddParallelLoopMetadata::True + } else { + AddParallelLoopMetadata::False + }; + // NOTE(jhilton): we attach the metadata to the parallel loop back edge. + // This is because in the structure of a loop, the back edge is more + // fundamental than the header and because the way LLVM detects loop metadata + // is by canonicalizing then checking the back edge. By adding the metadata to + // the back edge, we don't rely on the canonicalization succeeding, so it's a + // little less fragile (although I expect the canonicalization is very robust). + helper.funclet_br_maybe_add_metadata( + self, + bx, + target, + mergeable_succ(), + add_tapir_metadata, + ) } mir::TerminatorKind::SwitchInt { ref discr, ref targets } => { diff --git a/compiler/rustc_codegen_ssa/src/mir/mod.rs b/compiler/rustc_codegen_ssa/src/mir/mod.rs index eacfb37ff48eb..1515f149e9caa 100644 --- a/compiler/rustc_codegen_ssa/src/mir/mod.rs +++ b/compiler/rustc_codegen_ssa/src/mir/mod.rs @@ -119,6 +119,10 @@ pub struct FunctionCx<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> { /// we add a statement to compute the sync region. sync_region: Option, + /// We need to know the basic blocks terminated by back-edges that go into + /// parallel loop headers. This is so we can annotate with the right metadata. + parallel_back_edges: BitSet, + /// A stack of values returned from `tapir_runtime_start` for use in their corresponding `tapir_runtime_stop` /// call. We might need a more complex data structure than this if the token should ever be reused, but it's /// my impression that that isn't the case. @@ -221,6 +225,7 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( per_local_var_debug_info: None, caller_location: None, sync_region: None, + parallel_back_edges: BitSet::new_empty(mir.basic_blocks.len()), runtime_hint_stack: SmallVec::new(), }; @@ -282,9 +287,30 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( }) }; + let parallel_back_edges = || { + let mut seen = rustc_data_structures::fx::FxHashSet::default(); + mir::traversal::reverse_postorder(mir).filter_map(move |(bb, bb_data)| { + seen.insert(bb); + if let mir::TerminatorKind::Goto { target } = bb_data.terminator().kind { + // If the target of the jump is a parallel loop header and we've already observed + // it, we know this must be a loop back-edge. + if mir.basic_blocks[target].is_parallel_loop_header && seen.contains(&target) { + return Some(bb); + } + } + None + }) + }; + if Bx::supports_tapir() && uses_cilk_control_flow() { // Add a sync region at the top of the function, so we can use it later. fx.sync_region = Some(start_bx.sync_region_start()); + + // Let's figure out the parallel back-edges. These are edges into parallel + // loop headers. + parallel_back_edges().for_each(|bb| { + fx.parallel_back_edges.insert(bb); + }); } // The builders will be created separately for each basic block at `codegen_block`. diff --git a/compiler/rustc_codegen_ssa/src/traits/builder.rs b/compiler/rustc_codegen_ssa/src/traits/builder.rs index 01a6e6c14d759..ed501bbd09533 100644 --- a/compiler/rustc_codegen_ssa/src/traits/builder.rs +++ b/compiler/rustc_codegen_ssa/src/traits/builder.rs @@ -58,7 +58,7 @@ pub trait BuilderMethods<'a, 'tcx>: fn ret_void(&mut self); fn ret(&mut self, v: Self::Value); - fn br(&mut self, dest: Self::BasicBlock); + fn br(&mut self, dest: Self::BasicBlock) -> Self::Value; fn cond_br( &mut self, cond: Self::Value, @@ -170,6 +170,7 @@ pub trait BuilderMethods<'a, 'tcx>: fn range_metadata(&mut self, load: Self::Value, range: WrappingRange); fn nonnull_metadata(&mut self, load: Self::Value); + fn tapir_loop_spawn_strategy_metadata(&mut self, branch: Self::Value); fn store(&mut self, val: Self::Value, ptr: Self::Value, align: Align) -> Self::Value; fn store_with_flags( diff --git a/compiler/rustc_hir/src/hir.rs b/compiler/rustc_hir/src/hir.rs index 6d64e9cc3b9b2..08de810fa7c61 100644 --- a/compiler/rustc_hir/src/hir.rs +++ b/compiler/rustc_hir/src/hir.rs @@ -2055,6 +2055,8 @@ pub enum LoopSource { While, /// A `for _ in _ { .. }` loop. ForLoop, + /// A `cilk_for _ in _ { .. }` loop. + CilkFor, } impl LoopSource { @@ -2063,6 +2065,7 @@ impl LoopSource { LoopSource::Loop => "loop", LoopSource::While => "while", LoopSource::ForLoop => "for", + LoopSource::CilkFor => "cilk_for", } } } diff --git a/compiler/rustc_hir_typeck/src/expr.rs b/compiler/rustc_hir_typeck/src/expr.rs index be1f24c384633..13b906fad57a1 100644 --- a/compiler/rustc_hir_typeck/src/expr.rs +++ b/compiler/rustc_hir_typeck/src/expr.rs @@ -1296,6 +1296,8 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { expected: Expectation<'tcx>, expr: &'tcx hir::Expr<'tcx>, ) -> Ty<'tcx> { + // FIXME(jhilton): might be a good place to enforce the semantic rules for a cilk_for. + // Not sure how to implement `continue` here, disallowing is the conservative option. let coerce = match source { // you can only use break with a value from a normal `loop { }` hir::LoopSource::Loop => { @@ -1303,7 +1305,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { Some(CoerceMany::new(coerce_to)) } - hir::LoopSource::While | hir::LoopSource::ForLoop => None, + hir::LoopSource::While | hir::LoopSource::ForLoop | hir::LoopSource::CilkFor => None, }; let ctxt = BreakableCtxt { diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp index b45706fd1e5b2..63c647ebf6e1b 100644 --- a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp @@ -764,6 +764,20 @@ extern "C" void LLVMRustGlobalAddMetadata( unwrap(Global)->addMetadata(Kind, *unwrap(MD)); } +extern "C" LLVMMetadataRef LLVMRustMDGetTemporary(LLVMContextRef C) { + return wrap(llvm::MDNode::getTemporary(*unwrap(C), std::nullopt).release()); +} + +extern "C" void LLVMRustMDDeleteTemporary(LLVMMetadataRef MD) { + llvm::MDNode::deleteTemporary(unwrap(MD)); +} + +extern "C" void LLVMRustReplaceMDOperandWith( + LLVMMetadataRef MD, size_t Index, LLVMMetadataRef New +) { + unwrap(MD)->replaceOperandWith(Index, unwrap(New)); +} + extern "C" LLVMRustDIBuilderRef LLVMRustDIBuilderCreate(LLVMModuleRef M) { return new DIBuilder(*unwrap(M)); } diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs index 3017f912ef027..1fe9ec805e411 100644 --- a/compiler/rustc_middle/src/mir/mod.rs +++ b/compiler/rustc_middle/src/mir/mod.rs @@ -1276,11 +1276,21 @@ pub struct BasicBlockData<'tcx> { /// generated (particularly for MSVC cleanup). Unwind blocks must /// only branch to other unwind blocks. pub is_cleanup: bool, + + /// If true, this block is the loop header of a parallel loop. This + /// is used during codegen to provide appropriate attributes for + /// efficient Tapir parallel loops. + pub is_parallel_loop_header: bool, } impl<'tcx> BasicBlockData<'tcx> { pub fn new(terminator: Option>) -> BasicBlockData<'tcx> { - BasicBlockData { statements: vec![], terminator, is_cleanup: false } + BasicBlockData { + statements: vec![], + terminator, + is_cleanup: false, + is_parallel_loop_header: false, + } } /// Accessor for terminator. diff --git a/compiler/rustc_middle/src/mir/patch.rs b/compiler/rustc_middle/src/mir/patch.rs index b81e9fa1aab64..376cf63e00601 100644 --- a/compiler/rustc_middle/src/mir/patch.rs +++ b/compiler/rustc_middle/src/mir/patch.rs @@ -73,6 +73,7 @@ impl<'tcx> MirPatch<'tcx> { kind: TerminatorKind::UnwindResume, }), is_cleanup: true, + is_parallel_loop_header: false, }); self.resume_block = Some(bb); bb @@ -90,6 +91,7 @@ impl<'tcx> MirPatch<'tcx> { kind: TerminatorKind::Unreachable, }), is_cleanup: true, + is_parallel_loop_header: false, }); self.unreachable_cleanup_block = Some(bb); bb @@ -109,6 +111,7 @@ impl<'tcx> MirPatch<'tcx> { kind: TerminatorKind::UnwindTerminate(reason), }), is_cleanup: true, + is_parallel_loop_header: false, }); self.terminate_block = Some((bb, reason)); bb diff --git a/compiler/rustc_middle/src/mir/pretty.rs b/compiler/rustc_middle/src/mir/pretty.rs index d7c6a0a9d5f2a..119b5d2b0d391 100644 --- a/compiler/rustc_middle/src/mir/pretty.rs +++ b/compiler/rustc_middle/src/mir/pretty.rs @@ -588,7 +588,8 @@ where // Basic block label at the top. let cleanup_text = if data.is_cleanup { " (cleanup)" } else { "" }; - writeln!(w, "{INDENT}{block:?}{cleanup_text}: {{")?; + let parallel_loop_header = if data.is_parallel_loop_header { " (parallel-header)" } else { "" }; + writeln!(w, "{INDENT}{block:?}{cleanup_text}{parallel_loop_header}: {{")?; // List of statements in the middle. let mut current_location = Location { block, statement_index: 0 }; diff --git a/compiler/rustc_middle/src/mir/visit.rs b/compiler/rustc_middle/src/mir/visit.rs index 4e2b5fbb62e7a..eb0276b9b55a0 100644 --- a/compiler/rustc_middle/src/mir/visit.rs +++ b/compiler/rustc_middle/src/mir/visit.rs @@ -298,7 +298,8 @@ macro_rules! make_mir_visitor { let BasicBlockData { statements, terminator, - is_cleanup: _ + is_cleanup: _, + is_parallel_loop_header: _ } = data; let mut index = 0; diff --git a/compiler/rustc_middle/src/thir.rs b/compiler/rustc_middle/src/thir.rs index c6c4458153122..3ebaeee05ed9c 100644 --- a/compiler/rustc_middle/src/thir.rs +++ b/compiler/rustc_middle/src/thir.rs @@ -337,6 +337,9 @@ pub enum ExprKind<'tcx> { /// A `loop` expression. Loop { body: ExprId, + /// Whether to use the Tapir loop spawning strategy for this loop. Only true + /// when this loop originated from a `cilk_for`. + tapir_loop_spawn: bool, }, Let { expr: ExprId, diff --git a/compiler/rustc_middle/src/thir/visit.rs b/compiler/rustc_middle/src/thir/visit.rs index 3f574c135339c..23d8c6b2ffefd 100644 --- a/compiler/rustc_middle/src/thir/visit.rs +++ b/compiler/rustc_middle/src/thir/visit.rs @@ -73,7 +73,7 @@ pub fn walk_expr<'thir, 'tcx: 'thir, V: Visitor<'thir, 'tcx>>( visitor.visit_expr(&visitor.thir()[expr]); visitor.visit_pat(pat); } - Loop { body } => visitor.visit_expr(&visitor.thir()[body]), + Loop { body, tapir_loop_spawn: _ } => visitor.visit_expr(&visitor.thir()[body]), Match { scrutinee, ref arms, .. } => { visitor.visit_expr(&visitor.thir()[scrutinee]); for &arm in &**arms { diff --git a/compiler/rustc_mir_build/src/build/cfg.rs b/compiler/rustc_mir_build/src/build/cfg.rs index 2bd0e28973101..5a152171143fc 100644 --- a/compiler/rustc_mir_build/src/build/cfg.rs +++ b/compiler/rustc_mir_build/src/build/cfg.rs @@ -20,6 +20,12 @@ impl<'tcx> CFG<'tcx> { self.basic_blocks.push(BasicBlockData::new(None)) } + pub(crate) fn start_new_parallel_loop_header(&mut self) -> BasicBlock { + let mut block_data = BasicBlockData::new(None); + block_data.is_parallel_loop_header = true; + self.basic_blocks.push(block_data) + } + pub(crate) fn start_new_cleanup_block(&mut self) -> BasicBlock { let bb = self.start_new_block(); self.block_data_mut(bb).is_cleanup = true; diff --git a/compiler/rustc_mir_build/src/build/expr/into.rs b/compiler/rustc_mir_build/src/build/expr/into.rs index d09bfc8695d79..c055b0ab1dec5 100644 --- a/compiler/rustc_mir_build/src/build/expr/into.rs +++ b/compiler/rustc_mir_build/src/build/expr/into.rs @@ -204,7 +204,7 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { this.cfg.goto(short_circuit, source_info, target); target.unit() } - ExprKind::Loop { body } => { + ExprKind::Loop { body, tapir_loop_spawn } => { // [block] // | // [loop_block] -> [body_block] -/eval. body/-> [body_block_end] @@ -215,34 +215,68 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { // The false link is required to make sure borrowck considers unwinds through the // body, even when the exact code in the body cannot unwind - let loop_block = this.cfg.start_new_block(); + // loop_block is the header. It needs to be a parallel loop header if + // this is the header to a parallel loop. + let loop_block = if tapir_loop_spawn { + this.cfg.start_new_parallel_loop_header() + } else { + this.cfg.start_new_block() + }; // Start the loop. this.cfg.goto(block, source_info, loop_block); - this.in_breakable_scope(Some(loop_block), destination, expr_span, move |this| { - // conduct the test, if necessary - let body_block = this.cfg.start_new_block(); + let exit_block = this.in_breakable_scope( + Some(loop_block), + destination, + expr_span, + move |this| { + // conduct the test, if necessary + let body_block = this.cfg.start_new_block(); + this.cfg.terminate( + loop_block, + source_info, + TerminatorKind::FalseUnwind { + real_target: body_block, + unwind: UnwindAction::Continue, + }, + ); + this.diverge_from(loop_block); + + // The “return” value of the loop body must always be a unit. We therefore + // introduce a unit temporary as the destination for the loop body. + let tmp = this.get_unit_temp(); + // Execute the body, branching back to the test. + let body_block_end = unpack!(this.expr_into_dest(tmp, body_block, body)); + this.cfg.goto(body_block_end, source_info, loop_block); + + // Loops are only exited by `break` expressions. + None + }, + ); + + if tapir_loop_spawn { + // NOTE(jhilton): when building HIR we spawn only when the yielded value is Some. + // So we know that if the loop body itself had no breaks, we won't be breaking or continuing + // from within the loop. + + // First, we need to grab the block the loop jumps to. + unpack!(block = exit_block); + let next_block = this.cfg.start_new_block(); + // Now, we need to end it with a sync. There's probably + // a more efficient way of doing this where [in_breakable_scope] uses + // a sync rather than a goto for exits, but LLVM should be + // able to remove the extra jump. this.cfg.terminate( - loop_block, + block, source_info, - TerminatorKind::FalseUnwind { - real_target: body_block, - unwind: UnwindAction::Continue, - }, + TerminatorKind::Sync { target: next_block }, ); - this.diverge_from(loop_block); - - // The “return” value of the loop body must always be a unit. We therefore - // introduce a unit temporary as the destination for the loop body. - let tmp = this.get_unit_temp(); - // Execute the body, branching back to the test. - let body_block_end = unpack!(this.expr_into_dest(tmp, body_block, body)); - this.cfg.goto(body_block_end, source_info, loop_block); - - // Loops are only exited by `break` expressions. - None - }) + next_block.unit() + } else { + // No need for a sync, just return the block. + exit_block + } } ExprKind::Call { ty: _, fun, ref args, from_hir_call, fn_span } => { let fun = unpack!(block = this.as_local_operand(block, fun)); diff --git a/compiler/rustc_mir_build/src/thir/cx/expr.rs b/compiler/rustc_mir_build/src/thir/cx/expr.rs index d648abed51634..7b9adf7d7fdfa 100644 --- a/compiler/rustc_mir_build/src/thir/cx/expr.rs +++ b/compiler/rustc_mir_build/src/thir/cx/expr.rs @@ -722,7 +722,7 @@ impl<'tcx> Cx<'tcx> { scrutinee_hir_id: discr.hir_id, arms: arms.iter().map(|a| self.convert_arm(a)).collect(), }, - hir::ExprKind::Loop(body, ..) => { + hir::ExprKind::Loop(body, _, source, ..) => { let block_ty = self.typeck_results().node_type(body.hir_id); let temp_lifetime = self .rvalue_scopes @@ -734,7 +734,13 @@ impl<'tcx> Cx<'tcx> { span: self.thir[block].span, kind: ExprKind::Block { block }, }); - ExprKind::Loop { body } + // When lowering to HIR, we already spawn only the Some(..) arm of the loop + // for the cilk_for case. This means that all we have to do is keep track + // of the origin of the loop so we can add metadata in MIR, but we don't + // have to spawn the body (when lowering to MIR we'll also sync after the + // loop). + let tapir_loop_spawn = matches!(source, hir::LoopSource::CilkFor); + ExprKind::Loop { body, tapir_loop_spawn } } hir::ExprKind::Field(source, ..) => { let mut kind = ExprKind::Field { diff --git a/compiler/rustc_mir_build/src/thir/print.rs b/compiler/rustc_mir_build/src/thir/print.rs index 575c2fbcdcdd5..89632ddca66ff 100644 --- a/compiler/rustc_mir_build/src/thir/print.rs +++ b/compiler/rustc_mir_build/src/thir/print.rs @@ -297,7 +297,7 @@ impl<'a, 'tcx> ThirPrinter<'a, 'tcx> { self.print_expr(*source, depth_lvl + 2); print_indented!(self, "}", depth_lvl); } - Loop { body } => { + Loop { body, tapir_loop_spawn: _ } => { print_indented!(self, "Loop (", depth_lvl); print_indented!(self, "body:", depth_lvl + 1); self.print_expr(*body, depth_lvl + 2); diff --git a/compiler/rustc_mir_dataflow/src/elaborate_drops.rs b/compiler/rustc_mir_dataflow/src/elaborate_drops.rs index 1b2f2cd9477fc..aa81266128803 100644 --- a/compiler/rustc_mir_dataflow/src/elaborate_drops.rs +++ b/compiler/rustc_mir_dataflow/src/elaborate_drops.rs @@ -450,6 +450,7 @@ where kind: TerminatorKind::Unreachable, }), is_cleanup: self.unwind.is_cleanup(), + is_parallel_loop_header: false, }); } @@ -617,6 +618,7 @@ where }, }), is_cleanup: unwind.is_cleanup(), + is_parallel_loop_header: false, }; let switch_block = self.elaborator.patch().new_block(switch_block); self.drop_flag_test_block(switch_block, succ, unwind) @@ -667,6 +669,7 @@ where source_info: self.source_info, }), is_cleanup: unwind.is_cleanup(), + is_parallel_loop_header: false, }; let destructor_block = self.elaborator.patch().new_block(result); @@ -717,6 +720,7 @@ where ), ], is_cleanup: unwind.is_cleanup(), + is_parallel_loop_header: false, terminator: Some(Terminator { source_info: self.source_info, // this gets overwritten by drop elaboration. @@ -731,6 +735,7 @@ where Rvalue::BinaryOp(BinOp::Eq, Box::new((copy(Place::from(cur)), copy(len.into())))), )], is_cleanup: unwind.is_cleanup(), + is_parallel_loop_header: false, terminator: Some(Terminator { source_info: self.source_info, kind: TerminatorKind::if_(move_(can_go), succ, drop_block), @@ -837,6 +842,7 @@ where self.assign(cur.into(), Rvalue::Use(zero)), ], is_cleanup: unwind.is_cleanup(), + is_parallel_loop_header: false, terminator: Some(Terminator { source_info: self.source_info, kind: TerminatorKind::Goto { target: loop_block }, @@ -968,6 +974,7 @@ where statements: vec![], terminator: Some(Terminator { source_info: self.source_info, kind: k }), is_cleanup: unwind.is_cleanup(), + is_parallel_loop_header: false, }) } diff --git a/compiler/rustc_mir_transform/src/add_call_guards.rs b/compiler/rustc_mir_transform/src/add_call_guards.rs index a47c8d94bba22..5e0c185126e56 100644 --- a/compiler/rustc_mir_transform/src/add_call_guards.rs +++ b/compiler/rustc_mir_transform/src/add_call_guards.rs @@ -65,6 +65,7 @@ impl AddCallGuards { source_info, kind: TerminatorKind::Goto { target: *destination }, }), + is_parallel_loop_header: false, }; // Get the index it will be when inserted into the MIR diff --git a/compiler/rustc_mir_transform/src/add_moves_for_packed_drops.rs b/compiler/rustc_mir_transform/src/add_moves_for_packed_drops.rs index de6d20ae3e807..577ee8ede3e12 100644 --- a/compiler/rustc_mir_transform/src/add_moves_for_packed_drops.rs +++ b/compiler/rustc_mir_transform/src/add_moves_for_packed_drops.rs @@ -91,6 +91,7 @@ fn add_move_for_packed_drop<'tcx>( statements: vec![Statement { source_info, kind: StatementKind::StorageDead(temp) }], terminator: Some(Terminator { source_info, kind: TerminatorKind::Goto { target } }), is_cleanup, + is_parallel_loop_header: false, }); patch.add_statement(loc, StatementKind::StorageLive(temp)); diff --git a/compiler/rustc_mir_transform/src/automatic_sync_insertion.rs b/compiler/rustc_mir_transform/src/automatic_sync_insertion.rs index 3864ec793bc38..9ab48213c637e 100644 --- a/compiler/rustc_mir_transform/src/automatic_sync_insertion.rs +++ b/compiler/rustc_mir_transform/src/automatic_sync_insertion.rs @@ -36,6 +36,7 @@ impl<'tcx> MirPass<'tcx> for InsertSyncs { kind: mir::TerminatorKind::Return, }), is_cleanup: false, + is_parallel_loop_header: false, }; let target = new_blocks.as_mut().push(return_block); let new_bb_data = diff --git a/compiler/rustc_mir_transform/src/check_alignment.rs b/compiler/rustc_mir_transform/src/check_alignment.rs index 9eec724ef21b9..547b178ab50fb 100644 --- a/compiler/rustc_mir_transform/src/check_alignment.rs +++ b/compiler/rustc_mir_transform/src/check_alignment.rs @@ -142,6 +142,11 @@ fn split_block( statements: block_data.statements.split_off(location.statement_index), terminator: block_data.terminator.take(), is_cleanup: block_data.is_cleanup, + // This is false because in this context, split_block is used in ways where both the first and second basic + // block are relevant. As a result, the first basic block (if it was initially the loop header) is still the + // loop header, especially since none of the back-edges are redirected here (even if they were, + // is_parallel_loop_header should be changed in that code). + is_parallel_loop_header: false, }; basic_blocks.push(new_block) diff --git a/compiler/rustc_mir_transform/src/coroutine.rs b/compiler/rustc_mir_transform/src/coroutine.rs index ea2fa047ddae1..8634fdf644ca3 100644 --- a/compiler/rustc_mir_transform/src/coroutine.rs +++ b/compiler/rustc_mir_transform/src/coroutine.rs @@ -303,6 +303,7 @@ impl<'tcx> TransformVisitor<'tcx> { statements, terminator: Some(Terminator { source_info, kind: TerminatorKind::Return }), is_cleanup: false, + is_parallel_loop_header: false, }); block @@ -1161,6 +1162,7 @@ fn insert_switch<'tcx>( statements: vec![assign], terminator: Some(Terminator { source_info, kind: switch }), is_cleanup: false, + is_parallel_loop_header: false, }, ); @@ -1294,6 +1296,7 @@ fn insert_term_block<'tcx>(body: &mut Body<'tcx>, kind: TerminatorKind<'tcx>) -> statements: Vec::new(), terminator: Some(Terminator { source_info, kind }), is_cleanup: false, + is_parallel_loop_header: false, }) } @@ -1320,6 +1323,7 @@ fn insert_panic_block<'tcx>( statements: Vec::new(), terminator: Some(Terminator { source_info, kind: term }), is_cleanup: false, + is_parallel_loop_header: false, }); assert_block @@ -1401,6 +1405,7 @@ fn create_coroutine_resume_function<'tcx>( statements: vec![transform.set_discr(VariantIdx::new(POISONED), source_info)], terminator: Some(Terminator { source_info, kind: TerminatorKind::UnwindResume }), is_cleanup: true, + is_parallel_loop_header: false, }); for (idx, block) in body.basic_blocks_mut().iter_enumerated_mut() { @@ -1492,6 +1497,7 @@ fn insert_clean_drop(body: &mut Body<'_>) -> BasicBlock { statements: Vec::new(), terminator: Some(Terminator { source_info, kind: term }), is_cleanup: false, + is_parallel_loop_header: false, }) } @@ -1558,6 +1564,7 @@ fn create_cases<'tcx>( kind: TerminatorKind::Goto { target }, }), is_cleanup: false, + is_parallel_loop_header: false, }); (point.state, block) diff --git a/compiler/rustc_mir_transform/src/coverage/mod.rs b/compiler/rustc_mir_transform/src/coverage/mod.rs index 4c5be0a3f4bed..c0747502692df 100644 --- a/compiler/rustc_mir_transform/src/coverage/mod.rs +++ b/compiler/rustc_mir_transform/src/coverage/mod.rs @@ -214,6 +214,7 @@ fn inject_edge_counter_basic_block( kind: TerminatorKind::Goto { target: to_bb }, }), is_cleanup: false, + is_parallel_loop_header: false, }); let edge_ref = mir_body[from_bb] .terminator_mut() diff --git a/compiler/rustc_mir_transform/src/promote_consts.rs b/compiler/rustc_mir_transform/src/promote_consts.rs index 577b8f2080fcd..6fd1524e59050 100644 --- a/compiler/rustc_mir_transform/src/promote_consts.rs +++ b/compiler/rustc_mir_transform/src/promote_consts.rs @@ -641,6 +641,7 @@ impl<'a, 'tcx> Promoter<'a, 'tcx> { kind: TerminatorKind::Return, }), is_cleanup: false, + is_parallel_loop_header: false, }) } diff --git a/compiler/rustc_mir_transform/src/shim.rs b/compiler/rustc_mir_transform/src/shim.rs index 75613a2c55555..c00a26eca17db 100644 --- a/compiler/rustc_mir_transform/src/shim.rs +++ b/compiler/rustc_mir_transform/src/shim.rs @@ -258,6 +258,7 @@ fn build_drop_shim<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId, ty: Option>) statements: vec![], terminator: Some(Terminator { source_info, kind }), is_cleanup: false, + is_parallel_loop_header: false, }) }; block(&mut blocks, TerminatorKind::Goto { target: return_block }); @@ -432,6 +433,7 @@ fn build_thread_local_shim<'tcx>(tcx: TyCtxt<'tcx>, instance: ty::InstanceDef<'t }], terminator: Some(Terminator { source_info, kind: TerminatorKind::Return }), is_cleanup: false, + is_parallel_loop_header: false, }); new_body( @@ -517,6 +519,7 @@ impl<'tcx> CloneShimBuilder<'tcx> { statements, terminator: Some(Terminator { source_info, kind }), is_cleanup, + is_parallel_loop_header: false, }) } @@ -869,6 +872,7 @@ fn build_call_shim<'tcx>( statements, terminator: Some(Terminator { source_info, kind }), is_cleanup, + is_parallel_loop_header: false, }) }; @@ -992,6 +996,7 @@ pub fn build_adt_ctor(tcx: TyCtxt<'_>, ctor_id: DefId) -> Body<'_> { statements: vec![statement], terminator: Some(Terminator { source_info, kind: TerminatorKind::Return }), is_cleanup: false, + is_parallel_loop_header: false, }; let source = MirSource::item(ctor_id); @@ -1039,6 +1044,7 @@ fn build_fn_ptr_addr_shim<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId, self_ty: Ty<'t statements, terminator: Some(Terminator { source_info, kind: TerminatorKind::Return }), is_cleanup: false, + is_parallel_loop_header: false, }; let source = MirSource::from_instance(ty::InstanceDef::FnPtrAddrShim(def_id, self_ty)); new_body(source, IndexVec::from_elem_n(start_block, 1), locals, sig.inputs().len(), span) @@ -1104,6 +1110,7 @@ fn build_construct_coroutine_by_move_shim<'tcx>( statements, terminator: Some(Terminator { source_info, kind: TerminatorKind::Return }), is_cleanup: false, + is_parallel_loop_header: false, }; let source = MirSource::from_instance(ty::InstanceDef::ConstructCoroutineInClosureShim { diff --git a/compiler/rustc_mir_transform/src/simplify.rs b/compiler/rustc_mir_transform/src/simplify.rs index 8c8818bd68e6f..b21058d3ecdb4 100644 --- a/compiler/rustc_mir_transform/src/simplify.rs +++ b/compiler/rustc_mir_transform/src/simplify.rs @@ -74,6 +74,7 @@ impl<'tcx> MirPass<'tcx> for SimplifyCfg { self.name() } + #[instrument(level = "debug", skip_all, fields(name = self.name()))] fn run_pass(&self, _: TyCtxt<'tcx>, body: &mut Body<'tcx>) { debug!("SimplifyCfg({:?}) - simplifying {:?}", self.name(), body.source); simplify_cfg(body); @@ -106,6 +107,7 @@ impl<'a, 'tcx> CfgSimplifier<'a, 'tcx> { CfgSimplifier { basic_blocks, pred_count } } + #[instrument(level = "debug", skip(self))] pub fn simplify(mut self) { self.strip_nops(); @@ -127,8 +129,41 @@ impl<'a, 'tcx> CfgSimplifier<'a, 'tcx> { let mut terminator = self.basic_blocks[bb].terminator.take().expect("invalid terminator state"); + debug!("terminator of {:?} = {:?}", bb, terminator.kind); + for successor in terminator.successors_mut() { + let old_successor = *successor; self.collapse_goto_chain(successor, &mut changed); + debug!( + "successor before and after collapse: {:?} and {:?}", + old_successor, successor + ); + + // If the old successor was a parallel loop header + // and has no more predecessors we should say that + // the new successor is a parallel loop header. + // This seems correct since whenever there's a goto chain + // from the old successor to the new successor, the successor + // changes. Then, it must be equivalent to jump to any point in + // the goto chain or to jump to the beginning. + // Therefore if the old + // successor is a parallel loop header the new successor should + // be as well. + // NOTE(jhilton): we shouldn't actually need the condition that + // self.pred_count[old_successor] == 0 since we should instead observe + // that all calls to collapse_goto_chain behave similarly on the same + // initial successor. + + let should_make_successor_parallel_loop_header = *successor != old_successor + && self.pred_count[old_successor] == 0 + && self.basic_blocks[old_successor].is_parallel_loop_header; + if should_make_successor_parallel_loop_header { + debug!( + "making {:?} a new parallel loop header from {:?}", + *successor, old_successor + ); + self.basic_blocks[*successor].is_parallel_loop_header = true; + } } let mut inner_changed = true; @@ -140,6 +175,7 @@ impl<'a, 'tcx> CfgSimplifier<'a, 'tcx> { changed |= inner_changed; } + debug!(?merged_blocks); let statements_to_merge = merged_blocks.iter().map(|&i| self.basic_blocks[i].statements.len()).sum(); @@ -165,6 +201,7 @@ impl<'a, 'tcx> CfgSimplifier<'a, 'tcx> { /// * the block has statements /// * the block has a terminator other than `goto` /// * the block has no terminator (meaning some other part of the current optimization stole it) + #[instrument(level = "debug", skip(self))] fn take_terminator_if_simple_goto(&mut self, bb: BasicBlock) -> Option> { match self.basic_blocks[bb] { BasicBlockData { @@ -180,6 +217,7 @@ impl<'a, 'tcx> CfgSimplifier<'a, 'tcx> { } /// Collapse a goto chain starting from `start` + #[instrument(level = "debug", skip(self))] fn collapse_goto_chain(&mut self, start: &mut BasicBlock, changed: &mut bool) { // Using `SmallVec` here, because in some logs on libcore oli-obk saw many single-element // goto chains. We should probably benchmark different sizes. @@ -189,6 +227,7 @@ impl<'a, 'tcx> CfgSimplifier<'a, 'tcx> { let Terminator { kind: TerminatorKind::Goto { target }, .. } = terminator else { unreachable!(); }; + debug!("found {:?} with terminator {:?}", current, terminator.kind); terminators.push((current, terminator)); current = target; } @@ -216,6 +255,7 @@ impl<'a, 'tcx> CfgSimplifier<'a, 'tcx> { } // merge a block with 1 `goto` predecessor to its parent + #[instrument(level = "debug", skip_all)] fn merge_successor( &mut self, merged_blocks: &mut Vec, diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index dcbe3f146fe9d..d324e367e23b0 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -1457,6 +1457,8 @@ impl<'a> Parser<'a> { assert!(this.eat_keyword(kw::For)); this.parse_expr_for(None, this.prev_token.span) } + } else if this.eat_keyword(kw::CilkFor) { + this.parse_expr_cilk_for(None, this.prev_token.span) } else if this.eat_keyword(kw::While) { this.parse_expr_while(None, this.prev_token.span) } else if let Some(label) = this.eat_label() { @@ -2793,16 +2795,42 @@ impl<'a> Parser<'a> { Ok((pat, expr)) } + // Parses `cilk_for in ` (`cilk_for` token already eaten). + fn parse_expr_cilk_for(&mut self, opt_label: Option