@@ -9,7 +9,7 @@ using Base: RefValue
99using Serialization: serialize, deserialize
1010using Clang_jll: clang
1111
12- export compile, load_function, compile_executable
12+ export compile, load_function, compile_shlib, compile_executable
1313export native_code_llvm, native_code_typed, native_llvm_module, native_code_native
1414
1515include (" target.jl" )
@@ -93,10 +93,10 @@ function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_nam
9393
9494 rt = only (native_code_typed (f, tt))[2 ]
9595 isconcretetype (rt) || error (" $f on $_tt did not infer to a concrete type. Got $rt " )
96-
96+
9797 f_wrap! (out:: Ref , args:: Ref{<:Tuple} ) = (out[] = f (args[]. .. ); nothing )
98- _, _, table = generate_shlib (f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, path, name; opt_level, strip_llvm, strip_asm, filename, kwargs... )
99-
98+ _, _, table = generate_obj (f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, path, name; opt_level, strip_llvm, strip_asm, filename, kwargs... )
99+
100100 lf = LazyStaticCompiledFunction {rt, tt} (Symbol (f), path, name, filename, table)
101101 cjl_path = joinpath (path, " $filename .cjl" )
102102 serialize (cjl_path, lf)
@@ -106,71 +106,59 @@ end
106106
107107"""
108108```julia
109- generate_shlib(f, tt, path::String, name::String, filenamebase::String="obj"; kwargs...)
109+ generate_obj(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj";
110+ \t strip_llvm = false,
111+ \t strip_asm = true,
112+ \t opt_level=3,
113+ \t kwargs...)
110114```
111- Low level interface for compiling a shared object / dynamically loaded library
112- (`.so` / `.dylib`) for function `f` given a tuple type `tt` characterizing
113- the types of the arguments for which the function will be compiled.
114-
115- See also `StaticCompiler.generate_shlib_fptr`.
115+ Low level interface for compiling object code (`.o`) for for function `f` given
116+ a tuple type `tt` characterizing the types of the arguments for which the
117+ function will be compiled.
116118
117119### Examples
118120```julia
119- julia> function test(n)
120- r = 0.0
121- for i=1:n
122- r += log(sqrt(i))
123- end
124- return r/n
125- end
126- test (generic function with 1 method)
121+ julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2)
122+ fib (generic function with 1 method)
127123
128- julia> path, name = StaticCompiler.generate_shlib(test , Tuple{Int64}, "./test")
129- ("./test", "test" )
124+ julia> path, name, table = StaticCompiler.generate_obj(fib , Tuple{Int64}, "./test")
125+ ("./test", "fib", IdDict{Any, String}() )
130126
131127shell> tree \$ path
132128./test
133- |-- obj.o
134- `-- obj.so
135-
136- 0 directories, 2 files
137-
138- julia> test(100_000)
139- 5.256496109495593
129+ └── obj.o
140130
141- julia> ccall(StaticCompiler.generate_shlib_fptr(path, name), Float64, (Int64,), 100_000)
142- 5.256496109495593
131+ 0 directories, 1 file
143132```
144133"""
145- function generate_shlib (f, tt, path:: String = tempname (), name = GPUCompiler. safe_name (repr (f)), filenamebase:: String = " obj" ;
134+ function generate_obj (f, tt, path:: String = tempname (), name = GPUCompiler. safe_name (repr (f)), filenamebase:: String = " obj" ;
146135 strip_llvm = false ,
147136 strip_asm = true ,
148137 opt_level= 3 ,
149138 kwargs... )
150139 mkpath (path)
151140 obj_path = joinpath (path, " $filenamebase .o" )
152- lib_path = joinpath (path, " $filenamebase .$(Libdl. dlext) " )
153141 tm = GPUCompiler. llvm_machine (NativeCompilerTarget ())
154142 job, kwargs = native_job (f, tt; name, kwargs... )
155143 # Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes.
156144 mod, meta = GPUCompiler. codegen (:llvm , job; strip= strip_llvm, only_entry= false , validate= false , optimize= false )
157-
145+
158146 # Use Enzyme's annotation and optimization pipeline
159147 annotate! (mod)
160148 optimize! (mod, tm)
161-
149+
162150 # Scoop up all the pointers in the optimized module, and replace them with unitialized global variables.
163151 # `table` is a dictionary where the keys are julia objects that are needed by the function, and the values
164152 # of the dictionary are the names of their associated LLVM GlobalVariable names.
165153 table = relocation_table! (mod)
166-
154+
167155 # Now that we've removed all the pointers from the code, we can (hopefully) safely lower all the instrinsics
168156 # (again, using Enzyme's pipeline)
169157 post_optimize! (mod, tm)
170-
158+
171159 # Make sure we didn't make any glaring errors
172160 LLVM. verify (mod)
173-
161+
174162 # Compile the LLVM module to native code and save it to disk
175163 obj, _ = GPUCompiler. emit_asm (job, mod; strip= strip_asm, validate= false , format= LLVM. API. LLVMObjectFile)
176164 open (obj_path, " w" ) do io
@@ -237,12 +225,15 @@ shell> ./hello
237225Hello, world!
238226```
239227"""
240- function compile_executable (f, _tt= (), path:: String = " ./" , name= GPUCompiler. safe_name (repr (f)); filename= name, kwargs... )
241- tt = Base. to_tuple_type (_tt)
242- tt == Tuple{} || tt == Tuple{Int, Ptr{Ptr{UInt8}}} || error (" input type signature $_tt must be either () or (Int, Ptr{Ptr{UInt8}})" )
228+ function compile_executable (f, types= (), path:: String = " ./" , name= GPUCompiler. safe_name (repr (f));
229+ filename= name,
230+ kwargs... )
231+
232+ tt = Base. to_tuple_type (types)
233+ tt == Tuple{} || tt == Tuple{Int, Ptr{Ptr{UInt8}}} || error (" input type signature $types must be either () or (Int, Ptr{Ptr{UInt8}})" )
243234
244235 rt = only (native_code_typed (f, tt))[2 ]
245- isconcretetype (rt) || error (" $f$_tt did not infer to a concrete type. Got $rt " )
236+ isconcretetype (rt) || error (" $f$types did not infer to a concrete type. Got $rt " )
246237
247238 # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals
248239 # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this
@@ -253,10 +244,36 @@ function compile_executable(f, _tt=(), path::String="./", name=GPUCompiler.safe_
253244end
254245
255246
247+ """
248+ ```julia
249+ compile_shlib(f, types::Tuple, path::String, name::String=repr(f); filename::String=name, kwargs...)
250+ ```
251+ As `compile_executable`, but compiling to a standalone `.dylib`/`.so` shared library.
252+ """
253+ function compile_shlib (f, types= (), path:: String = " ./" , name= GPUCompiler. safe_name (repr (f));
254+ filename= name,
255+ kwargs... )
256+
257+ tt = Base. to_tuple_type (types)
258+ isconcretetype (tt) || error (" input type signature $types is not concrete" )
259+
260+ rt = only (native_code_typed (f, tt))[2 ]
261+ isconcretetype (rt) || error (" $f$types did not infer to a concrete type. Got $rt " )
262+
263+ # Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals
264+ # Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this
265+
266+ generate_shlib (f, tt, path, name, filename; kwargs... )
267+
268+ joinpath (abspath (path), filename * " ." * Libdl. dlext)
269+ end
270+
271+ function generate_shlib_fptr (f, tt, path:: String = tempname (), name = GPUCompiler. safe_name (repr (f)), filename:: String = name;
272+ temp:: Bool = true ,
273+ kwargs... )
256274
257- function generate_shlib_fptr (f, tt, path:: String = tempname (), name = GPUCompiler. safe_name (repr (f)), filenamebase:: String = " obj" ; temp:: Bool = true , kwargs... )
258275 generate_shlib (f, tt, path, name; kwargs... )
259- lib_path = joinpath (abspath (path), " $filenamebase .$(Libdl. dlext) " )
276+ lib_path = joinpath (abspath (path), " $filename .$(Libdl. dlext) " )
260277 ptr = Libdl. dlopen (lib_path, Libdl. RTLD_LOCAL)
261278 fptr = Libdl. dlsym (ptr, " julia_$name " )
262279 @assert fptr != C_NULL
@@ -302,8 +319,8 @@ julia> test(100_000)
3023195.256496109495593
303320```
304321"""
305- function generate_shlib_fptr (path:: String , name, filenamebase :: String = " obj " )
306- lib_path = joinpath (abspath (path), " $filenamebase .$(Libdl. dlext) " )
322+ function generate_shlib_fptr (path:: String , name, filename :: String = name )
323+ lib_path = joinpath (abspath (path), " $filename .$(Libdl. dlext) " )
307324 ptr = Libdl. dlopen (lib_path, Libdl. RTLD_LOCAL)
308325 fptr = Libdl. dlsym (ptr, " julia_$name " )
309326 @assert fptr != C_NULL
@@ -334,38 +351,90 @@ function generate_executable(f, tt, path::String = tempname(), name = GPUCompile
334351 mkpath (path)
335352 obj_path = joinpath (path, " $filename .o" )
336353 exec_path = joinpath (path, filename)
354+ job, kwargs = native_job (f, tt; name, kwargs... )
355+ obj, _ = GPUCompiler. codegen (:obj , job; strip= true , only_entry= false , validate= false )
356+
357+ # Write to file
337358 open (obj_path, " w" ) do io
338- job, kwargs = native_job (f, tt; name, kwargs... )
339- obj, _ = GPUCompiler. codegen (:obj , job; strip= true , only_entry= false , validate= false )
359+ write (io, obj)
360+ end
361+
362+ # Pick a compiler
363+ cc = Sys. isapple () ? ` cc` : clang ()
364+ # Compile!
365+ if Sys. isapple ()
366+ # Apple no longer uses _start, so we can just specify a custom entry
367+ entry = " _julia_$name "
368+ run (` $cc -e $entry $obj_path -o $exec_path ` )
369+ else
370+ # Write a minimal wrapper to avoid having to specify a custom entry
371+ wrapper_path = joinpath (path, " wrapper.c" )
372+ f = open (wrapper_path, " w" )
373+ print (f, """ int main(int argc, char** argv)
374+ {
375+ julia_$name (argc, argv);
376+ return 0;
377+ }""" )
378+ close (f)
379+ run (` $cc $wrapper_path $obj_path -o $exec_path ` )
380+ # Clean up
381+ run (` rm $wrapper_path ` )
382+ end
383+
384+ path, name
385+ end
386+
387+ """
388+ ```julia
389+ generate_shlib(f, tt, path::String, name::String, filenamebase::String="obj"; kwargs...)
390+ ```
391+ Low level interface for compiling a shared object / dynamically loaded library
392+ (`.so` / `.dylib`) for function `f` given a tuple type `tt` characterizing
393+ the types of the arguments for which the function will be compiled.
394+ See also `StaticCompiler.generate_shlib_fptr`.
395+ ### Examples
396+ ```julia
397+ julia> function test(n)
398+ r = 0.0
399+ for i=1:n
400+ r += log(sqrt(i))
401+ end
402+ return r/n
403+ end
404+ test (generic function with 1 method)
405+ julia> path, name = StaticCompiler.generate_shlib(test, Tuple{Int64}, "./test")
406+ ("./test", "test")
407+ shell> tree \$ path
408+ ./test
409+ |-- obj.o
410+ `-- obj.so
411+ 0 directories, 2 files
412+ julia> test(100_000)
413+ 5.256496109495593
414+ julia> ccall(StaticCompiler.generate_shlib_fptr(path, name), Float64, (Int64,), 100_000)
415+ 5.256496109495593
416+ ```
417+ """
418+ function generate_shlib (f, tt, path:: String = tempname (), name = GPUCompiler. safe_name (repr (f)), filename:: String = name; kwargs... )
419+ mkpath (path)
420+ obj_path = joinpath (path, " $filename .o" )
421+ lib_path = joinpath (path, " $filename .$(Libdl. dlext) " )
422+ job, kwargs = native_job (f, tt; name, kwargs... )
423+ obj, _ = GPUCompiler. codegen (:obj , job; strip= true , only_entry= false , validate= false )
340424
425+ open (obj_path, " w" ) do io
341426 write (io, obj)
342- flush (io)
343-
344- # Pick a compiler
345- cc = Sys. isapple () ? ` cc` : clang ()
346- # Compile!
347- if Sys. isapple ()
348- # Apple no longer uses _start, so we can just specify a custom entry
349- entry = " _julia_$name "
350- run (` $cc -e $entry $obj_path -o $exec_path ` )
351- else
352- # Write a minimal wrapper to avoid having to specify a custom entry
353- wrapper_path = joinpath (path, " wrapper.c" )
354- f = open (wrapper_path, " w" )
355- print (f, """ int main(int argc, char** argv)
356- {
357- julia_$name (argc, argv);
358- return 0;
359- }""" )
360- close (f)
361- run (` $cc $wrapper_path $obj_path -o $exec_path ` )
362- # Clean up
363- run (` rm $wrapper_path ` )
364- end
365427 end
428+
429+ # Pick a Clang
430+ cc = Sys. isapple () ? ` cc` : clang ()
431+ # Compile!
432+ run (` $cc -shared -o $lib_path $obj_path ` )
433+
366434 path, name
367435end
368436
437+
369438function native_code_llvm (@nospecialize (func), @nospecialize (types); kwargs... )
370439 job, kwargs = native_job (func, types; kwargs... )
371440 GPUCompiler. code_llvm (stdout , job; kwargs... )
0 commit comments