From dfef2e96fe0216e243f458b8fe04a8b82a07065a Mon Sep 17 00:00:00 2001 From: Manuel Drehwald Date: Tue, 9 Dec 2025 07:50:30 -0800 Subject: [PATCH 1/3] Remove the need to call clang for std::offload usages --- compiler/rustc_codegen_llvm/src/back/write.rs | 74 ++++++++++++++++++- compiler/rustc_codegen_llvm/src/base.rs | 6 +- compiler/rustc_codegen_llvm/src/intrinsic.rs | 8 +- compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 22 +++++- compiler/rustc_interface/src/tests.rs | 2 +- .../rustc_llvm/llvm-wrapper/RustWrapper.cpp | 32 ++++++-- compiler/rustc_session/src/config.rs | 12 +-- compiler/rustc_session/src/options.rs | 23 +++++- 8 files changed, 149 insertions(+), 30 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index e8da7f68136dd..a649ee4bff1cc 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -703,10 +703,9 @@ pub(crate) unsafe fn llvm_optimize( llvm::set_value_name(new_fn, &name); } - if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Enable) { + if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Device) { let cx = SimpleCx::new(module.module_llvm.llmod(), module.module_llvm.llcx, cgcx.pointer_size); - for func in cx.get_functions() { let offload_kernel = "offload-kernel"; if attributes::has_string_attr(func, offload_kernel) { @@ -775,12 +774,79 @@ pub(crate) unsafe fn llvm_optimize( ) }; - if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Enable) { + if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Device) { + let device_path = cgcx.output_filenames.path(OutputType::Object); + let device_dir = device_path.parent().unwrap(); + let device_out = device_dir.join("host.out"); + let device_out_c = path_to_c_string(device_out.as_path()); unsafe { - llvm::LLVMRustBundleImages(module.module_llvm.llmod(), module.module_llvm.tm.raw()); + // 1) Bundle device module into offload image host.out (device TM) + let ok = llvm::LLVMRustBundleImages( + module.module_llvm.llmod(), + module.module_llvm.tm.raw(), + device_out_c.as_ptr(), + ); + assert!(ok, "LLVMRustBundleImages (device -> host.out) failed"); + if !device_out.exists() { + panic!("BundleImages failed, `host.out` was not created!"); + } } } + // This assumes that we previously compiled our kernels for a gpu target, which created a + // `host.out` artifact. The user is supposed to provide us with a path to this artifact, we + // don't need any other artifacts from the previous run. We will embed this artifact into our + // LLVM-IR host module, to create a `host.o` ObjectFile, which we will write to disk. + // The last, not yet automated steps uses the `clang-linker-wrapper` to process `host.o`. + if !cgcx.target_is_like_gpu { + if let Some(device_path) = config + .offload + .iter() + .find_map(|o| if let config::Offload::Host(path) = o { Some(path) } else { None }) + { + let device_pathbuf = PathBuf::from(device_path); + if device_pathbuf.is_relative() { + panic!("Absolute path is needed"); + } else if device_pathbuf + .file_name() + .and_then(|n| n.to_str()) + .is_some_and(|n| n != "host.out") + { + panic!("Need path to the host.out file"); + } + assert!(device_pathbuf.exists()); + let host_path = cgcx.output_filenames.path(OutputType::Object); + let host_dir = host_path.parent().unwrap(); + let out_obj = host_dir.join("host.o"); + let host_out_c = path_to_c_string(device_pathbuf.as_path()); + + // 2) Finalize host: lib.bc + host.out -> host.o (host TM) + // We create a full clone of our LLVM host module, since we will embed the device IR + // into it, and this might break caching or incremental compilation otherwise. + let llmod2 = llvm::LLVMCloneModule(module.module_llvm.llmod()); + let ok = + unsafe { llvm::LLVMRustOffloadEmbedBufferInModule(llmod2, host_out_c.as_ptr()) }; + assert!(ok, "LLVMRustOffloadEmbedBufferInModule failed"); + write_output_file( + dcx, + module.module_llvm.tm.raw(), + config.no_builtins, + llmod2, + &out_obj, + None, + llvm::FileType::ObjectFile, + &cgcx.prof, + true, + ); + if !out_obj.exists() { + dbg!("{:?} does not exist!", out_obj); + panic!("FinalizeOffload failed!"); + } + // We ignore cgcx.save_temps here and unconditionally always keep our `host.out` artifact. + // Otherwise, recompiling the host code would fail since we deleted that device artifact + // in the previous host compilation, which would be confusing at best. + } + } result.into_result().unwrap_or_else(|()| llvm_err(dcx, LlvmError::RunLlvmPasses)) } diff --git a/compiler/rustc_codegen_llvm/src/base.rs b/compiler/rustc_codegen_llvm/src/base.rs index 16455b4c79cd6..388118f9b4f17 100644 --- a/compiler/rustc_codegen_llvm/src/base.rs +++ b/compiler/rustc_codegen_llvm/src/base.rs @@ -93,9 +93,9 @@ pub(crate) fn compile_codegen_unit( // They are necessary for correct offload execution. We do this here to simplify the // `offload` intrinsic, avoiding the need for tracking whether it's the first // intrinsic call or not. - if cx.sess().opts.unstable_opts.offload.contains(&Offload::Enable) - && !cx.sess().target.is_like_gpu - { + let has_host_offload = + cx.sess().opts.unstable_opts.offload.iter().any(|o| matches!(o, Offload::Host(_))); + if has_host_offload && !cx.sess().target.is_like_gpu { cx.offload_globals.replace(Some(OffloadGlobals::declare(&cx))); } diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index 3bc890310cc87..f3d9192074777 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -202,13 +202,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> { return Ok(()); } sym::offload => { - if !tcx - .sess - .opts - .unstable_opts - .offload - .contains(&rustc_session::config::Offload::Enable) - { + if tcx.sess.opts.unstable_opts.offload.is_empty() { let _ = tcx.dcx().emit_almost_fatal(OffloadWithoutEnable); } diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index a86b4cc389158..75b3e5955b78f 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -1666,7 +1666,15 @@ mod Offload { use super::*; unsafe extern "C" { /// Processes the module and writes it in an offload compatible way into a "host.out" file. - pub(crate) fn LLVMRustBundleImages<'a>(M: &'a Module, TM: &'a TargetMachine) -> bool; + pub(crate) fn LLVMRustBundleImages<'a>( + M: &'a Module, + TM: &'a TargetMachine, + host_out: *const c_char, + ) -> bool; + pub(crate) unsafe fn LLVMRustOffloadEmbedBufferInModule<'a>( + _M: &'a Module, + _host_out: *const c_char, + ) -> bool; pub(crate) fn LLVMRustOffloadMapper<'a>(OldFn: &'a Value, NewFn: &'a Value); } } @@ -1680,7 +1688,17 @@ mod Offload_fallback { /// Processes the module and writes it in an offload compatible way into a "host.out" file. /// Marked as unsafe to match the real offload wrapper which is unsafe due to FFI. #[allow(unused_unsafe)] - pub(crate) unsafe fn LLVMRustBundleImages<'a>(_M: &'a Module, _TM: &'a TargetMachine) -> bool { + pub(crate) unsafe fn LLVMRustBundleImages<'a>( + _M: &'a Module, + _TM: &'a TargetMachine, + _host_out: *const c_char, + ) -> bool { + unimplemented!("This rustc version was not built with LLVM Offload support!"); + } + pub(crate) unsafe fn LLVMRustOffloadEmbedBufferInModule<'a>( + _M: &'a Module, + _host_out: *const c_char, + ) -> bool { unimplemented!("This rustc version was not built with LLVM Offload support!"); } #[allow(unused_unsafe)] diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs index 8dab3a7f37f59..d075f94ef8502 100644 --- a/compiler/rustc_interface/src/tests.rs +++ b/compiler/rustc_interface/src/tests.rs @@ -837,7 +837,7 @@ fn test_unstable_options_tracking_hash() { tracked!(no_profiler_runtime, true); tracked!(no_trait_vptr, true); tracked!(no_unique_section_names, true); - tracked!(offload, vec![Offload::Enable]); + tracked!(offload, vec![Offload::Device]); tracked!(on_broken_pipe, OnBrokenPipe::Kill); tracked!(osx_rpath_install_name, true); tracked!(packed_bundled_libs, true); diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp index 0720af0eb7e0c..02e6abf24627f 100644 --- a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp @@ -43,8 +43,10 @@ // available. As such, we only try to build it in the first place, if // llvm.offload is enabled. #ifdef OFFLOAD +#include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Object/OffloadBinary.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" #endif // for raw `write` in the bad-alloc handler @@ -174,12 +176,13 @@ static Error writeFile(StringRef Filename, StringRef Data) { // --image=file=device.bc,triple=amdgcn-amd-amdhsa,arch=gfx90a,kind=openmp // The input module is the rust code compiled for a gpu target like amdgpu. // Based on clang/tools/clang-offload-packager/ClangOffloadPackager.cpp -extern "C" bool LLVMRustBundleImages(LLVMModuleRef M, TargetMachine &TM) { +extern "C" bool LLVMRustBundleImages(LLVMModuleRef M, TargetMachine &TM, + const char *HostOutPath) { std::string Storage; llvm::raw_string_ostream OS1(Storage); llvm::WriteBitcodeToFile(*unwrap(M), OS1); OS1.flush(); - auto MB = llvm::MemoryBuffer::getMemBufferCopy(Storage, "module.bc"); + auto MB = llvm::MemoryBuffer::getMemBufferCopy(Storage, "device.bc"); SmallVector BinaryData; raw_svector_ostream OS2(BinaryData); @@ -188,19 +191,38 @@ extern "C" bool LLVMRustBundleImages(LLVMModuleRef M, TargetMachine &TM) { ImageBinary.TheImageKind = object::IMG_Bitcode; ImageBinary.Image = std::move(MB); ImageBinary.TheOffloadKind = object::OFK_OpenMP; - ImageBinary.StringData["triple"] = TM.getTargetTriple().str(); - ImageBinary.StringData["arch"] = TM.getTargetCPU(); + + std::string TripleStr = TM.getTargetTriple().str(); + llvm::StringRef CPURef = TM.getTargetCPU(); + ImageBinary.StringData["triple"] = TripleStr; + ImageBinary.StringData["arch"] = CPURef; llvm::SmallString<0> Buffer = OffloadBinary::write(ImageBinary); if (Buffer.size() % OffloadBinary::getAlignment() != 0) // Offload binary has invalid size alignment return false; OS2 << Buffer; - if (Error E = writeFile("host.out", + if (Error E = writeFile(HostOutPath, StringRef(BinaryData.begin(), BinaryData.size()))) return false; return true; } +extern "C" bool LLVMRustOffloadEmbedBufferInModule(LLVMModuleRef HostM, + const char *HostOutPath) { + auto MBOrErr = MemoryBuffer::getFile(HostOutPath); + if (!MBOrErr) { + auto E = MBOrErr.getError(); + auto _B = errorCodeToError(E); + return false; + } + MemoryBufferRef Buf = (*MBOrErr)->getMemBufferRef(); + Module *M = unwrap(HostM); + StringRef SectionName = ".llvm.offloading"; + Align Alignment = Align(8); + llvm::embedBufferInModule(*M, Buf, SectionName, Alignment); + return true; +} + extern "C" void LLVMRustOffloadMapper(LLVMValueRef OldFn, LLVMValueRef NewFn) { llvm::Function *oldFn = llvm::unwrap(OldFn); llvm::Function *newFn = llvm::unwrap(NewFn); diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs index a3a97dfec61dc..2774333573f6b 100644 --- a/compiler/rustc_session/src/config.rs +++ b/compiler/rustc_session/src/config.rs @@ -190,10 +190,12 @@ pub enum CoverageLevel { } // The different settings that the `-Z offload` flag can have. -#[derive(Clone, Copy, PartialEq, Hash, Debug)] +#[derive(Clone, PartialEq, Hash, Debug)] pub enum Offload { - /// Enable the llvm offload pipeline - Enable, + /// Entry point for `std::offload`, enables kernel compilation for a gpu device + Device, + /// Second step in the offload pipeline, generates the host code to call kernels. + Host(String), } /// The different settings that the `-Z autodiff` flag can have. @@ -2578,9 +2580,7 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M ) } - if !nightly_options::is_unstable_enabled(matches) - && unstable_opts.offload.contains(&Offload::Enable) - { + if !nightly_options::is_unstable_enabled(matches) && !unstable_opts.offload.is_empty() { early_dcx.early_fatal( "`-Zoffload=Enable` also requires `-Zunstable-options` \ and a nightly compiler", diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index aea0b73ee9277..2b83d1225c97f 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -1451,8 +1451,27 @@ pub mod parse { let mut v: Vec<&str> = v.split(",").collect(); v.sort_unstable(); for &val in v.iter() { - let variant = match val { - "Enable" => Offload::Enable, + // Split each entry on '=' if it has an argument + let (key, arg) = match val.split_once('=') { + Some((k, a)) => (k, Some(a)), + None => (val, None), + }; + + let variant = match key { + "Host" => { + if let Some(p) = arg { + Offload::Host(p.to_string()) + } else { + return false; + } + } + "Device" => { + if let Some(_) = arg { + // Device does not accept a value + return false; + } + Offload::Device + } _ => { // FIXME(ZuseZ4): print an error saying which value is not recognized return false; From 3fdc6da2aad13909f02754d32db85ac69ca86102 Mon Sep 17 00:00:00 2001 From: Manuel Drehwald Date: Tue, 9 Dec 2025 07:51:04 -0800 Subject: [PATCH 2/3] adding proper error handling for offload --- compiler/rustc_codegen_llvm/messages.ftl | 7 ++++++- compiler/rustc_codegen_llvm/src/back/write.rs | 20 +++++++++---------- compiler/rustc_codegen_llvm/src/errors.rs | 20 +++++++++++++++++++ 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/compiler/rustc_codegen_llvm/messages.ftl b/compiler/rustc_codegen_llvm/messages.ftl index b3ef9840f5dc2..a637ae8184b4f 100644 --- a/compiler/rustc_codegen_llvm/messages.ftl +++ b/compiler/rustc_codegen_llvm/messages.ftl @@ -19,7 +19,12 @@ codegen_llvm_lto_bitcode_from_rlib = failed to get bitcode from object file for codegen_llvm_mismatch_data_layout = data-layout for target `{$rustc_target}`, `{$rustc_layout}`, differs from LLVM target's `{$llvm_target}` default layout, `{$llvm_layout}` -codegen_llvm_offload_without_enable = using the offload feature requires -Z offload=Enable +codegen_llvm_offload_bundleimages_failed = call to BundleImages failed, `host.out` was not created +codegen_llvm_offload_embed_failed = call to EmbedBufferInModule failed, `host.o` was not created +codegen_llvm_offload_no_abs_path = using the `-Z offload=Host=/absolute/path/to/host.out` flag requires an absolute path +codegen_llvm_offload_no_host_out = using the `-Z offload=Host=/absolute/path/to/host.out` flag must point to a `host.out` file +codegen_llvm_offload_nonexisting = the given path/file to `host.out` does not exist. Did you forget to run the device compilation first? +codegen_llvm_offload_without_enable = using the offload feature requires -Z offload= codegen_llvm_offload_without_fat_lto = using the offload feature requires -C lto=fat codegen_llvm_parse_bitcode = failed to parse bitcode for LTO module diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index a649ee4bff1cc..d87de8b384677 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -786,9 +786,8 @@ pub(crate) unsafe fn llvm_optimize( module.module_llvm.tm.raw(), device_out_c.as_ptr(), ); - assert!(ok, "LLVMRustBundleImages (device -> host.out) failed"); - if !device_out.exists() { - panic!("BundleImages failed, `host.out` was not created!"); + if !ok || !device_out.exists() { + dcx.emit_err(crate::errors::OffloadBundleImagesFailed); } } } @@ -806,15 +805,16 @@ pub(crate) unsafe fn llvm_optimize( { let device_pathbuf = PathBuf::from(device_path); if device_pathbuf.is_relative() { - panic!("Absolute path is needed"); + dcx.emit_err(crate::errors::OffloadWithoutAbsPath); } else if device_pathbuf .file_name() .and_then(|n| n.to_str()) .is_some_and(|n| n != "host.out") { - panic!("Need path to the host.out file"); + dcx.emit_err(crate::errors::OffloadWrongFileName); + } else if !device_pathbuf.exists() { + dcx.emit_err(crate::errors::OffloadNonexistingPath); } - assert!(device_pathbuf.exists()); let host_path = cgcx.output_filenames.path(OutputType::Object); let host_dir = host_path.parent().unwrap(); let out_obj = host_dir.join("host.o"); @@ -826,7 +826,9 @@ pub(crate) unsafe fn llvm_optimize( let llmod2 = llvm::LLVMCloneModule(module.module_llvm.llmod()); let ok = unsafe { llvm::LLVMRustOffloadEmbedBufferInModule(llmod2, host_out_c.as_ptr()) }; - assert!(ok, "LLVMRustOffloadEmbedBufferInModule failed"); + if !ok { + dcx.emit_err(crate::errors::OffloadEmbedFailed); + } write_output_file( dcx, module.module_llvm.tm.raw(), @@ -838,10 +840,6 @@ pub(crate) unsafe fn llvm_optimize( &cgcx.prof, true, ); - if !out_obj.exists() { - dbg!("{:?} does not exist!", out_obj); - panic!("FinalizeOffload failed!"); - } // We ignore cgcx.save_temps here and unconditionally always keep our `host.out` artifact. // Otherwise, recompiling the host code would fail since we deleted that device artifact // in the previous host compilation, which would be confusing at best. diff --git a/compiler/rustc_codegen_llvm/src/errors.rs b/compiler/rustc_codegen_llvm/src/errors.rs index b59067b9745b8..c73140e041b60 100644 --- a/compiler/rustc_codegen_llvm/src/errors.rs +++ b/compiler/rustc_codegen_llvm/src/errors.rs @@ -52,6 +52,26 @@ pub(crate) struct OffloadWithoutEnable; #[diag(codegen_llvm_offload_without_fat_lto)] pub(crate) struct OffloadWithoutFatLTO; +#[derive(Diagnostic)] +#[diag(codegen_llvm_offload_no_abs_path)] +pub(crate) struct OffloadWithoutAbsPath; + +#[derive(Diagnostic)] +#[diag(codegen_llvm_offload_no_host_out)] +pub(crate) struct OffloadWrongFileName; + +#[derive(Diagnostic)] +#[diag(codegen_llvm_offload_nonexisting)] +pub(crate) struct OffloadNonexistingPath; + +#[derive(Diagnostic)] +#[diag(codegen_llvm_offload_bundleimages_failed)] +pub(crate) struct OffloadBundleImagesFailed; + +#[derive(Diagnostic)] +#[diag(codegen_llvm_offload_embed_failed)] +pub(crate) struct OffloadEmbedFailed; + #[derive(Diagnostic)] #[diag(codegen_llvm_lto_bitcode_from_rlib)] pub(crate) struct LtoBitcodeFromRlib { From 8e1d80305fd98ee7da1cb422d5da2495502e995d Mon Sep 17 00:00:00 2001 From: Manuel Drehwald Date: Tue, 9 Dec 2025 07:50:48 -0800 Subject: [PATCH 3/3] Update offloading docs to account for simplified usage --- src/doc/rustc-dev-guide/src/offload/usage.md | 23 +++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/doc/rustc-dev-guide/src/offload/usage.md b/src/doc/rustc-dev-guide/src/offload/usage.md index d934de9049bea..062534a4b6556 100644 --- a/src/doc/rustc-dev-guide/src/offload/usage.md +++ b/src/doc/rustc-dev-guide/src/offload/usage.md @@ -77,28 +77,25 @@ pub extern "gpu-kernel" fn kernel_1(x: *mut [f64; 256]) { ## Compile instructions It is important to use a clang compiler build on the same llvm as rustc. Just calling clang without the full path will likely use your system clang, which probably will be incompatible. So either substitute clang/lld invocations below with absolute path, or set your `PATH` accordingly. -First we generate the host (cpu) code. The first build is just to compile libc, take note of the hashed path. Then we call rustc directly to build our host code, while providing the libc artifact to rustc. +First we generate the device (gpu) code. Replace the target-cpu with the right code for your gpu. ``` -cargo +offload build -r -v -rustc +offload --edition 2024 src/lib.rs -g --crate-type cdylib -C opt-level=3 -C panic=abort -C lto=fat -L dependency=/absolute_path_to/target/release/deps --extern libc=/absolute_path_to/target/release/deps/liblibc-.rlib --emit=llvm-bc,llvm-ir -Zoffload=Enable -Zunstable-options +RUSTFLAGS="-Ctarget-cpu=gfx90a --emit=llvm-bc,llvm-ir -Zoffload=Device -Csave-temps -Zunstable-options" cargo +offload build -Zunstable-options -r -v --target amdgcn-amd-amdhsa -Zbuild-std=core ``` +You might afterwards need to copy your target/release/deps/.bc to lib.bc for now, before the next step. -Now we generate the device code. Replace the target-cpu with the right code for your gpu. +Now we generate the host (cpu) code. ``` -RUSTFLAGS="-Ctarget-cpu=gfx90a --emit=llvm-bc,llvm-ir -Zoffload=Enable -Zunstable-options" cargo +offload build -Zunstable-options -r -v --target amdgcn-amd-amdhsa -Zbuild-std=core +RUSTFLAGS="--emit=llvm-bc,llvm-ir -Csave-temps -Zoffload=Host=/p/lustre1/drehwald1/prog/offload/r/target/amdgcn-amd-amdhsa/release/deps/host.out -Zunstable-options" cargo +offload build -r ``` - +This call also does a lot of work and generates multiple intermediate files for llvm offload. +While we integrated most offload steps into rustc by now, one binary invocation still remains for now: ``` -"clang-21" "-cc1" "-triple" "x86_64-unknown-linux-gnu" "-S" "-save-temps=cwd" "-disable-free" "-clear-ast-before-backend" "-main-file-name" "lib.rs" "-mrelocation-model" "pic" "-pic-level" "2" "-pic-is-pie" "-mframe-pointer=all" "-fmath-errno" "-ffp-contract=on" "-fno-rounding-math" "-mconstructor-aliases" "-funwind-tables=2" "-target-cpu" "x86-64" "-tune-cpu" "generic" "-resource-dir" "//rust/build/x86_64-unknown-linux-gnu/llvm/lib/clang/21" "-ferror-limit" "19" "-fopenmp" "-fopenmp-offload-mandatory" "-fgnuc-version=4.2.1" "-fskip-odr-check-in-gmf" "-fembed-offload-object=host.out" "-fopenmp-targets=amdgcn-amd-amdhsa" "-faddrsig" "-D__GCC_HAVE_DWARF2_CFI_ASM=1" "-o" "host.s" "-x" "ir" "lib.bc" - -"clang-21" "-cc1as" "-triple" "x86_64-unknown-linux-gnu" "-filetype" "obj" "-main-file-name" "lib.rs" "-target-cpu" "x86-64" "-mrelocation-model" "pic" "-o" "host.o" "host.s" - -"clang-linker-wrapper" "--should-extract=gfx90a" "--device-compiler=amdgcn-amd-amdhsa=-g" "--device-compiler=amdgcn-amd-amdhsa=-save-temps=cwd" "--device-linker=amdgcn-amd-amdhsa=-lompdevice" "--host-triple=x86_64-unknown-linux-gnu" "--save-temps" "--linker-path=/ABSOlUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/lld/bin/ld.lld" "--hash-style=gnu" "--eh-frame-hdr" "-m" "elf_x86_64" "-pie" "-dynamic-linker" "/lib64/ld-linux-x86-64.so.2" "-o" "bare" "/lib/../lib64/Scrt1.o" "/lib/../lib64/crti.o" "/ABSOLUTE_PATH_TO/crtbeginS.o" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/bin/../lib/x86_64-unknown-linux-gnu" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/lib/clang/21/lib/x86_64-unknown-linux-gnu" "-L/lib/../lib64" "-L/usr/lib64" "-L/lib" "-L/usr/lib" "host.o" "-lstdc++" "-lm" "-lomp" "-lomptarget" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/lib" "-lgcc_s" "-lgcc" "-lpthread" "-lc" "-lgcc_s" "-lgcc" "/ABSOLUTE_PATH_TO/crtendS.o" "/lib/../lib64/crtn.o" +"clang-linker-wrapper" "--should-extract=gfx90a" "--device-compiler=amdgcn-amd-amdhsa=-g" "--device-compiler=amdgcn-amd-amdhsa=-save-temps=cwd" "--device-linker=amdgcn-amd-amdhsa=-lompdevice" "--host-triple=x86_64-unknown-linux-gnu" "--save-temps" "--linker-path=/ABSOlUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/lld/bin/ld.lld" "--hash-style=gnu" "--eh-frame-hdr" "-m" "elf_x86_64" "-pie" "-dynamic-linker" "/lib64/ld-linux-x86-64.so.2" "-o" "bare" "/lib/../lib64/Scrt1.o" "/lib/../lib64/crti.o" "/ABSOLUTE_PATH_TO/crtbeginS.o" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/bin/../lib/x86_64-unknown-linux-gnu" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/lib/clang/21/lib/x86_64-unknown-linux-gnu" "-L/lib/../lib64" "-L/usr/lib64" "-L/lib" "-L/usr/lib" "target//release/host.o" "-lstdc++" "-lm" "-lomp" "-lomptarget" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/lib" "-lgcc_s" "-lgcc" "-lpthread" "-lc" "-lgcc_s" "-lgcc" "/ABSOLUTE_PATH_TO/crtendS.o" "/lib/../lib64/crtn.o" ``` -Especially for the last three commands I recommend to not fix the paths, but rather just re-generate them by copying a bare-mode openmp example and compiling it with your clang. By adding `-###` to your clang invocation, you can see the invidual steps. -You can ignore other steps, e.g. the invocation of a "clang-offload-packager". +You can try to find the paths to those files on your system. However, I recommend to not fix the paths, but rather just re-generate them by copying a bare-mode openmp example and compiling it with your clang. By adding `-###` to your clang invocation, you can see the invidual steps. +It will show multiple steps, just look for the clang-linker-wrapper example. Make sure to still include the path to the `host.o` file, and not whatever tmp file you got when compiling your c++ example with the following call. ``` myclang++ -fuse-ld=lld -O3 -fopenmp -fopenmp-offload-mandatory --offload-arch=gfx90a omp_bare.cpp -o main -### ```