diff --git a/streamcompiler/src/compiler/compiler.rs b/streamcompiler/src/compiler/compiler.rs index f0cbee6..ca12975 100644 --- a/streamcompiler/src/compiler/compiler.rs +++ b/streamcompiler/src/compiler/compiler.rs @@ -3,12 +3,12 @@ use core::panic; use inkwell::{builder::Builder, context::Context, execution_engine::{ExecutionEngine, JitFunction}, module::{Linkage, Module}, passes::PassBuilderOptions, targets::{CodeModel, FileType, InitializationConfig, RelocMode, Target, TargetMachine}, types::{BasicType, BasicTypeEnum}, values::{BasicMetadataValueEnum, FloatValue, FunctionValue, IntValue, VectorValue}, AddressSpace, OptimizationLevel}; use inkwell::attributes::AttributeLoc; use inkwell::llvm_sys::core::LLVMGetEnumAttributeKind; -use inkwell::types::AnyType; +use inkwell::types::{AnyType, VectorType}; use inkwell::values::AsValueRef; use crate::{compiler::expression::{ExprCompiler, ScalarExprCompiler, VectorExprCompiler}, parser::{Clause, ClauseType, Expr}}; pub type StreamCompilerProgramType = unsafe extern "C" fn(*const f64, i32) -> (); -pub type NumgrepProgramType = unsafe extern "C" fn(*const f64, *const bool, i32) -> (); +pub type NumgrepProgramType = unsafe extern "C" fn(*const f64, *const u8, i32) -> (); static mut ID: u64 = 0; @@ -48,7 +48,7 @@ impl JittedStreamCompilerProgram<'_> { } impl JittedNumgrepProgram<'_> { - pub unsafe fn call(&self, input: *const f64, filter: *const bool, len: i32) { + pub unsafe fn call(&self, input: *const f64, filter: *const u8, len: i32) { self.function.call(input, filter, len); } } @@ -119,14 +119,14 @@ impl<'ctx> CodeGen<'ctx> { pub fn compile_numgrep(&'ctx self, program: &'ctx[Clause]) -> JittedNumgrepProgram<'ctx> { - let compiled_program = self.compile_numgrep_program::<1>(program); - JittedNumgrepProgram { vector_width: Some(1), function: compiled_program } + let compiled_program = self.compile_numgrep_program::(program); + JittedNumgrepProgram { vector_width: Some(VEC_WIDTH), function: compiled_program } } fn compile_stream_compiler_program(&'ctx self, program: &'ctx[Clause]) -> JitFunction<'ctx, StreamCompilerProgramType> { - if VEC_WIDTH != 1 && VEC_WIDTH != 4 && VEC_WIDTH != 8 { - panic!("Unsupported vector width: {}. Only 1 (scalar), 4 and 8 are supported.", VEC_WIDTH); + if VEC_WIDTH != 1 && VEC_WIDTH != 8 { + panic!("Unsupported vector width: {}. Only 1 (scalar), and 8 are supported.", VEC_WIDTH); } struct CompiledClause<'a> { @@ -327,8 +327,8 @@ impl<'ctx> CodeGen<'ctx> { } fn compile_numgrep_program(&'ctx self, program: &'ctx[Clause]) -> JitFunction<'ctx, NumgrepProgramType> { - if VEC_WIDTH != 1 { - panic!("Unsupported vector width: {}. Only 1 (scalar) is supported.", VEC_WIDTH); + if VEC_WIDTH != 1 && VEC_WIDTH != 4 && VEC_WIDTH != 8 { + panic!("Unsupported vector width: {}. Only 1 (scalar), 4, or 8 is supported.", VEC_WIDTH); } struct CompiledClause<'a> { @@ -361,12 +361,18 @@ impl<'ctx> CodeGen<'ctx> { false ); - let (fn_name, function) = if VEC_WIDTH == 1 { + let (input_pointee_type, bool_type, fn_name, function) = if VEC_WIDTH == 1 { + let input_pointee_type = self.context.f64_type().as_basic_type_enum(); + let bool_type = self.context.bool_type().as_basic_type_enum(); let fn_name = format!("numgrep_program_scalar_{}", get_id()); let function = self.module.add_function(&fn_name, fn_type, Some(Linkage::External)); - (fn_name, function) + (input_pointee_type, bool_type, fn_name, function) } else { - panic!("numgrep program compilation for vector width > 1 is not implemented yet"); + let input_pointee_type = self.context.f64_type().vec_type(VEC_WIDTH).as_basic_type_enum(); + let bool_type = self.context.bool_type().vec_type(VEC_WIDTH).as_basic_type_enum(); + let fn_name = format!("numgrep_program_vec{}_{}", VEC_WIDTH, get_id()); + let function = self.module.add_function(&fn_name, fn_type, Some(Linkage::External)); + (input_pointee_type, bool_type, fn_name, function) }; let entry = self.context.append_basic_block(function, "entry"); @@ -381,7 +387,12 @@ impl<'ctx> CodeGen<'ctx> { let loop_end_bb = self.context.append_basic_block(function, "loop_end"); let exit_bb = self.context.append_basic_block(function, "exit"); - let should_include = self.builder.build_alloca(self.context.bool_type(), "should_filter").expect("Failed to allocate should_filter variable"); + let should_include = if VEC_WIDTH == 1 { + self.builder.build_alloca(self.context.bool_type(), "should_filter").expect("Failed to allocate should_filter variable") + } else { + self.builder.build_alloca(self.context.bool_type().vec_type(VEC_WIDTH), "should_filter").expect("Failed to allocate should_filter variable") + }; + self.builder.build_unconditional_branch(loop_start_bb).expect("Failed to build unconditional branch to loop"); self.builder.position_at_end(loop_start_bb); @@ -390,7 +401,7 @@ impl<'ctx> CodeGen<'ctx> { (&self.context.i32_type().const_zero(), entry), (&self.builder.build_int_add( loop_index.as_basic_value().into_int_value(), - self.context.i32_type().const_int(VEC_WIDTH as u64, false), + self.context.i32_type().const_int(1, false), "next_index" ).expect("Could not build increment"), loop_end_bb), ]); @@ -399,7 +410,11 @@ impl<'ctx> CodeGen<'ctx> { self.builder.build_int_compare( inkwell::IntPredicate::ULT, loop_index.as_basic_value().into_int_value(), - input_len, + self.builder.build_int_unsigned_div( + input_len, + self.context.i32_type().const_int(VEC_WIDTH as u64, false), + "loop_condition_div" + ).expect("Could not build loop condition division"), "loop_condition" ).expect("Could not build loop condition"), loop_body_bb, @@ -409,10 +424,10 @@ impl<'ctx> CodeGen<'ctx> { self.builder.position_at_end(loop_body_bb); let next_input = self.builder.build_load( - self.context.f64_type(), + input_pointee_type, unsafe { self.builder.build_gep( - self.context.f64_type(), + input_pointee_type, input_ptr, &[loop_index.as_basic_value().into_int_value()], "input_ptr" @@ -421,7 +436,21 @@ impl<'ctx> CodeGen<'ctx> { "next_input" ).expect("Failed to load next input"); - self.builder.build_store(should_include, self.context.bool_type().const_all_ones()).expect("Failed to store should_filter variable"); + match VEC_WIDTH { + 1 => { + self.builder.build_store(should_include, self.context.bool_type().const_all_ones()).expect("Failed to store should_include"); + }, + _ => { + // Yes I hate this too + let values = (0..VEC_WIDTH) + .map(|_| self.context.bool_type().const_all_ones()) + .collect::>(); + self.builder.build_store( + should_include, + VectorType::const_vector(&values) + ).expect("Failed to store should_include"); + }, + } for clause in compiled_clauses { let clause_entry_bb = self.context.append_basic_block(function, "clause_entry"); @@ -437,30 +466,64 @@ impl<'ctx> CodeGen<'ctx> { .expect_left("Could not get result of clause call"); // This looks wasteful, but the hope is that a) we'll have only 1 clause, and b) it will be easier to vectorize - let current_should_include= self.builder.build_load(self.context.bool_type(), should_include, "current_should_include").expect("Failed to load should_include variable"); - let new_should_include= self.builder.build_and( - current_should_include.into_int_value(), - result.into_int_value(), - "new_should_exclude" - ).expect("Failed to build AND for should_filter"); - - self.builder.build_store(should_include, new_should_include).expect("Failed to store clause result"); + let current_should_include= self.builder.build_load(bool_type, should_include, "current_should_include").expect("Failed to load should_include variable"); + + if VEC_WIDTH == 1 { + let new_should_include= self.builder.build_and( + current_should_include.into_int_value(), + result.into_int_value(), + "new_should_exclude" + ).expect("Failed to build AND for should_filter"); + + self.builder.build_store(should_include, new_should_include).expect("Failed to store clause result"); + } else { + let new_should_include= self.builder.build_and( + current_should_include.into_vector_value(), + result.into_vector_value(), + "new_should_exclude" + ).expect("Failed to build AND for should_filter"); + + self.builder.build_store(should_include, new_should_include).expect("Failed to store clause result"); + } } self.builder.build_unconditional_branch(loop_end_bb).expect("Failed to build unconditional branch to loop end"); self.builder.position_at_end(loop_end_bb); - self.builder.build_store( - unsafe { - self.builder.build_gep( - self.context.bool_type(), - filter_ptr, - &[loop_index.as_basic_value().into_int_value()], - "filter_ptr" - ).expect("Could not build GEP for filter") - }, - self.builder.build_load(self.context.bool_type(), should_include, "final_should_include").expect("Failed to load should_include variable") - ).expect("Failed to store final filter result"); + if VEC_WIDTH == 1 { + self.builder.build_store( + unsafe { + self.builder.build_gep( + bool_type, + filter_ptr, + &[loop_index.as_basic_value().into_int_value()], + "filter_ptr" + ).expect("Could not build GEP for filter") + }, + self.builder.build_load(bool_type, should_include, "final_should_include").expect("Failed to load should_include variable") + ).expect("Failed to store final filter result"); + } else { + let final_should_include = self.builder.build_load( + bool_type, + should_include, + "final_should_include" + ).expect("Failed to load should_include variable"); + + // note that we're storing an vector into the filter pointer, which means the bits are packed + // i.e. a <8 x i1> is a single byte + self.builder.build_store( + unsafe { + self.builder.build_gep( + bool_type, + filter_ptr, + &[loop_index.as_basic_value().into_int_value()], + "filter_ptr" + ).expect("Could not build GEP for filter") + }, + self.builder.build_load(bool_type, should_include, "final_should_include").expect("Failed to load should_include variable") + ).expect("Failed to store final filter result"); + } + self.builder.build_unconditional_branch(loop_start_bb).expect("Failed to build unconditional branch to loop start"); self.builder.position_at_end(exit_bb); @@ -478,7 +541,7 @@ impl<'ctx> CodeGen<'ctx> { ]; self.module.run_passes(passes.join(",").as_str(), &self.get_machine(), PassBuilderOptions::create()).expect("Failed to run passes on module"); - // self.dump_module(); + self.dump_module(); unsafe { self.execution_engine.get_function(&fn_name).ok().unwrap() } } @@ -514,8 +577,22 @@ impl<'ctx> CodeGen<'ctx> { match clause.clause_type { ClauseType::Filter => { match x.get_type() { - BasicTypeEnum::VectorType(_) => { - panic!("Vec filtering is not supported yet, please use scalar filtering instead"); + BasicTypeEnum::VectorType(t) => { + if t.get_element_type().into_float_type() != self.context.f64_type() { + panic!("Expected vector of f64 type for input parameter, got {:?}", t); + } + if t.get_size() != VEC_WIDTH as u32 { + panic!("Expected vector of f64 type with width {}, got {:?}", VEC_WIDTH, t); + } + + let condition = self.compile_expression_vec(&clause.expression, x.into_vector_value()).expect("Failed to compile expression"); + + let condition_as_bool = self.float_as_bool_vec(condition); + if filters_return_i1 { + self.builder.build_return(Some(&condition_as_bool)).expect("Failed to build return for filter clause"); + } else { + panic!("Vec filtering is only supported for i1 return type, please use scalar filtering instead"); + } }, BasicTypeEnum::FloatType(t) => { if t != self.context.f64_type() { @@ -592,7 +669,7 @@ impl<'ctx> CodeGen<'ctx> { self.expr_compiler.float_as_bool(self, value.into()).into() } - fn float_as_bool_vec<'cg>(&'cg self, value: VectorValue<'cg>) -> IntValue<'cg> { + fn float_as_bool_vec<'cg>(&'cg self, value: VectorValue<'cg>) -> VectorValue<'cg> { self.vector_expr_compiler.float_as_bool(self, value.into()).into() } } diff --git a/streamcompiler/src/compiler/expression/vector_expr_compiler.rs b/streamcompiler/src/compiler/expression/vector_expr_compiler.rs index d0c9167..2c3fe41 100644 --- a/streamcompiler/src/compiler/expression/vector_expr_compiler.rs +++ b/streamcompiler/src/compiler/expression/vector_expr_compiler.rs @@ -10,7 +10,7 @@ impl VectorExprCompiler { VectorType::const_vector(&values) } - fn const_vec<'a>(self: &'a Self, context: &'a Context, value: f64) -> VectorValue<'a> { + pub fn const_vec<'a>(self: &'a Self, context: &'a Context, value: f64) -> VectorValue<'a> { let as_f64 = context.f64_type().const_float(value); self.fill_vec(as_f64) } diff --git a/streamcompiler/src/numgrep/runner.rs b/streamcompiler/src/numgrep/runner.rs index f6e4a59..be505a0 100644 --- a/streamcompiler/src/numgrep/runner.rs +++ b/streamcompiler/src/numgrep/runner.rs @@ -15,6 +15,12 @@ impl<'a> Runner<'a> { let jitted_program = codegen.compile_numgrep(program); + match jitted_program.vector_width { + Some(8) => (), + Some(_) => panic!("Vector width other than 8 is not supported for numgrep"), + None => (), + } + Runner { jitted_program, } @@ -25,13 +31,25 @@ impl<'a> Runner<'a> { .flat_map(|(_, floats)| floats.iter().cloned()) .collect::>(); - let should_include = vec![false; input_floats.len()]; + // TODO: Handle when input_floats is not a multiple of the vector width + let should_include = vec![0u8; input_floats.len() / self.jitted_program.vector_width.unwrap_or(1) as usize]; unsafe { self.jitted_program.call(input_floats.as_ptr(), should_include.as_ptr(), input_floats.len() as i32); } + println!("should_include: {:?}", should_include); + + #[inline] + fn is_set(should_include: &[u8], index: usize, vec_width: usize) -> bool { + if vec_width != 1 { + should_include[index / vec_width] & (1u8 << (index % vec_width)) != 0 + } else { + should_include[index] != 0 + } + } + let mut output_index = 0; for i in 0..input.len() { for j in 0..input[i].1.len() { - if should_include[output_index + j] { + if is_set(&should_include, output_index + j, self.jitted_program.vector_width.unwrap_or(1) as usize) { println!("{}", input[i].0); break; }