Skip to content

Commit 57e0425

Browse files
Add compile_dylib (#64)
* Clean up `generate_executable` * Delete unused line * Rename generate_shlib to generate_obj * Add generate_dylib * Test generate_dylib * Add and test `compile_dylib` * s/_dylib/_shlib/g to keep original terminology
1 parent 998feec commit 57e0425

2 files changed

Lines changed: 156 additions & 83 deletions

File tree

src/StaticCompiler.jl

Lines changed: 138 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ using Base: RefValue
99
using Serialization: serialize, deserialize
1010
using Clang_jll: clang
1111

12-
export compile, load_function, compile_executable
12+
export compile, load_function, compile_shlib, compile_executable
1313
export native_code_llvm, native_code_typed, native_llvm_module, native_code_native
1414

1515
include("target.jl")
@@ -93,10 +93,10 @@ function compile(f, _tt, path::String = tempname(); name = GPUCompiler.safe_nam
9393

9494
rt = only(native_code_typed(f, tt))[2]
9595
isconcretetype(rt) || error("$f on $_tt did not infer to a concrete type. Got $rt")
96-
96+
9797
f_wrap!(out::Ref, args::Ref{<:Tuple}) = (out[] = f(args[]...); nothing)
98-
_, _, table = generate_shlib(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, path, name; opt_level, strip_llvm, strip_asm, filename, kwargs...)
99-
98+
_, _, table = generate_obj(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, path, name; opt_level, strip_llvm, strip_asm, filename, kwargs...)
99+
100100
lf = LazyStaticCompiledFunction{rt, tt}(Symbol(f), path, name, filename, table)
101101
cjl_path = joinpath(path, "$filename.cjl")
102102
serialize(cjl_path, lf)
@@ -106,71 +106,59 @@ end
106106

107107
"""
108108
```julia
109-
generate_shlib(f, tt, path::String, name::String, filenamebase::String="obj"; kwargs...)
109+
generate_obj(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj";
110+
\tstrip_llvm = false,
111+
\tstrip_asm = true,
112+
\topt_level=3,
113+
\tkwargs...)
110114
```
111-
Low level interface for compiling a shared object / dynamically loaded library
112-
(`.so` / `.dylib`) for function `f` given a tuple type `tt` characterizing
113-
the types of the arguments for which the function will be compiled.
114-
115-
See also `StaticCompiler.generate_shlib_fptr`.
115+
Low level interface for compiling object code (`.o`) for for function `f` given
116+
a tuple type `tt` characterizing the types of the arguments for which the
117+
function will be compiled.
116118
117119
### Examples
118120
```julia
119-
julia> function test(n)
120-
r = 0.0
121-
for i=1:n
122-
r += log(sqrt(i))
123-
end
124-
return r/n
125-
end
126-
test (generic function with 1 method)
121+
julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2)
122+
fib (generic function with 1 method)
127123
128-
julia> path, name = StaticCompiler.generate_shlib(test, Tuple{Int64}, "./test")
129-
("./test", "test")
124+
julia> path, name, table = StaticCompiler.generate_obj(fib, Tuple{Int64}, "./test")
125+
("./test", "fib", IdDict{Any, String}())
130126
131127
shell> tree \$path
132128
./test
133-
|-- obj.o
134-
`-- obj.so
135-
136-
0 directories, 2 files
137-
138-
julia> test(100_000)
139-
5.256496109495593
129+
└── obj.o
140130
141-
julia> ccall(StaticCompiler.generate_shlib_fptr(path, name), Float64, (Int64,), 100_000)
142-
5.256496109495593
131+
0 directories, 1 file
143132
```
144133
"""
145-
function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj";
134+
function generate_obj(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj";
146135
strip_llvm = false,
147136
strip_asm = true,
148137
opt_level=3,
149138
kwargs...)
150139
mkpath(path)
151140
obj_path = joinpath(path, "$filenamebase.o")
152-
lib_path = joinpath(path, "$filenamebase.$(Libdl.dlext)")
153141
tm = GPUCompiler.llvm_machine(NativeCompilerTarget())
154142
job, kwargs = native_job(f, tt; name, kwargs...)
155143
#Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes.
156144
mod, meta = GPUCompiler.codegen(:llvm, job; strip=strip_llvm, only_entry=false, validate=false, optimize=false)
157-
145+
158146
# Use Enzyme's annotation and optimization pipeline
159147
annotate!(mod)
160148
optimize!(mod, tm)
161-
149+
162150
# Scoop up all the pointers in the optimized module, and replace them with unitialized global variables.
163151
# `table` is a dictionary where the keys are julia objects that are needed by the function, and the values
164152
# of the dictionary are the names of their associated LLVM GlobalVariable names.
165153
table = relocation_table!(mod)
166-
154+
167155
# Now that we've removed all the pointers from the code, we can (hopefully) safely lower all the instrinsics
168156
# (again, using Enzyme's pipeline)
169157
post_optimize!(mod, tm)
170-
158+
171159
# Make sure we didn't make any glaring errors
172160
LLVM.verify(mod)
173-
161+
174162
# Compile the LLVM module to native code and save it to disk
175163
obj, _ = GPUCompiler.emit_asm(job, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile)
176164
open(obj_path, "w") do io
@@ -237,12 +225,15 @@ shell> ./hello
237225
Hello, world!
238226
```
239227
"""
240-
function compile_executable(f, _tt=(), path::String="./", name=GPUCompiler.safe_name(repr(f)); filename=name, kwargs...)
241-
tt = Base.to_tuple_type(_tt)
242-
tt == Tuple{} || tt == Tuple{Int, Ptr{Ptr{UInt8}}} || error("input type signature $_tt must be either () or (Int, Ptr{Ptr{UInt8}})")
228+
function compile_executable(f, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f));
229+
filename=name,
230+
kwargs...)
231+
232+
tt = Base.to_tuple_type(types)
233+
tt == Tuple{} || tt == Tuple{Int, Ptr{Ptr{UInt8}}} || error("input type signature $types must be either () or (Int, Ptr{Ptr{UInt8}})")
243234

244235
rt = only(native_code_typed(f, tt))[2]
245-
isconcretetype(rt) || error("$f$_tt did not infer to a concrete type. Got $rt")
236+
isconcretetype(rt) || error("$f$types did not infer to a concrete type. Got $rt")
246237

247238
# Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals
248239
# Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this
@@ -253,10 +244,36 @@ function compile_executable(f, _tt=(), path::String="./", name=GPUCompiler.safe_
253244
end
254245

255246

247+
"""
248+
```julia
249+
compile_shlib(f, types::Tuple, path::String, name::String=repr(f); filename::String=name, kwargs...)
250+
```
251+
As `compile_executable`, but compiling to a standalone `.dylib`/`.so` shared library.
252+
"""
253+
function compile_shlib(f, types=(), path::String="./", name=GPUCompiler.safe_name(repr(f));
254+
filename=name,
255+
kwargs...)
256+
257+
tt = Base.to_tuple_type(types)
258+
isconcretetype(tt) || error("input type signature $types is not concrete")
259+
260+
rt = only(native_code_typed(f, tt))[2]
261+
isconcretetype(rt) || error("$f$types did not infer to a concrete type. Got $rt")
262+
263+
# Would be nice to use a compiler pass or something to check if there are any heap allocations or references to globals
264+
# Keep an eye on https://github.com/JuliaLang/julia/pull/43747 for this
265+
266+
generate_shlib(f, tt, path, name, filename; kwargs...)
267+
268+
joinpath(abspath(path), filename * "." * Libdl.dlext)
269+
end
270+
271+
function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler.safe_name(repr(f)), filename::String=name;
272+
temp::Bool=true,
273+
kwargs...)
256274

257-
function generate_shlib_fptr(f, tt, path::String=tempname(), name = GPUCompiler.safe_name(repr(f)), filenamebase::String="obj"; temp::Bool=true, kwargs...)
258275
generate_shlib(f, tt, path, name; kwargs...)
259-
lib_path = joinpath(abspath(path), "$filenamebase.$(Libdl.dlext)")
276+
lib_path = joinpath(abspath(path), "$filename.$(Libdl.dlext)")
260277
ptr = Libdl.dlopen(lib_path, Libdl.RTLD_LOCAL)
261278
fptr = Libdl.dlsym(ptr, "julia_$name")
262279
@assert fptr != C_NULL
@@ -302,8 +319,8 @@ julia> test(100_000)
302319
5.256496109495593
303320
```
304321
"""
305-
function generate_shlib_fptr(path::String, name, filenamebase::String="obj")
306-
lib_path = joinpath(abspath(path), "$filenamebase.$(Libdl.dlext)")
322+
function generate_shlib_fptr(path::String, name, filename::String=name)
323+
lib_path = joinpath(abspath(path), "$filename.$(Libdl.dlext)")
307324
ptr = Libdl.dlopen(lib_path, Libdl.RTLD_LOCAL)
308325
fptr = Libdl.dlsym(ptr, "julia_$name")
309326
@assert fptr != C_NULL
@@ -334,38 +351,90 @@ function generate_executable(f, tt, path::String = tempname(), name = GPUCompile
334351
mkpath(path)
335352
obj_path = joinpath(path, "$filename.o")
336353
exec_path = joinpath(path, filename)
354+
job, kwargs = native_job(f, tt; name, kwargs...)
355+
obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false)
356+
357+
# Write to file
337358
open(obj_path, "w") do io
338-
job, kwargs = native_job(f, tt; name, kwargs...)
339-
obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false)
359+
write(io, obj)
360+
end
361+
362+
# Pick a compiler
363+
cc = Sys.isapple() ? `cc` : clang()
364+
# Compile!
365+
if Sys.isapple()
366+
# Apple no longer uses _start, so we can just specify a custom entry
367+
entry = "_julia_$name"
368+
run(`$cc -e $entry $obj_path -o $exec_path`)
369+
else
370+
# Write a minimal wrapper to avoid having to specify a custom entry
371+
wrapper_path = joinpath(path, "wrapper.c")
372+
f = open(wrapper_path, "w")
373+
print(f, """int main(int argc, char** argv)
374+
{
375+
julia_$name(argc, argv);
376+
return 0;
377+
}""")
378+
close(f)
379+
run(`$cc $wrapper_path $obj_path -o $exec_path`)
380+
# Clean up
381+
run(`rm $wrapper_path`)
382+
end
383+
384+
path, name
385+
end
386+
387+
"""
388+
```julia
389+
generate_shlib(f, tt, path::String, name::String, filenamebase::String="obj"; kwargs...)
390+
```
391+
Low level interface for compiling a shared object / dynamically loaded library
392+
(`.so` / `.dylib`) for function `f` given a tuple type `tt` characterizing
393+
the types of the arguments for which the function will be compiled.
394+
See also `StaticCompiler.generate_shlib_fptr`.
395+
### Examples
396+
```julia
397+
julia> function test(n)
398+
r = 0.0
399+
for i=1:n
400+
r += log(sqrt(i))
401+
end
402+
return r/n
403+
end
404+
test (generic function with 1 method)
405+
julia> path, name = StaticCompiler.generate_shlib(test, Tuple{Int64}, "./test")
406+
("./test", "test")
407+
shell> tree \$path
408+
./test
409+
|-- obj.o
410+
`-- obj.so
411+
0 directories, 2 files
412+
julia> test(100_000)
413+
5.256496109495593
414+
julia> ccall(StaticCompiler.generate_shlib_fptr(path, name), Float64, (Int64,), 100_000)
415+
5.256496109495593
416+
```
417+
"""
418+
function generate_shlib(f, tt, path::String = tempname(), name = GPUCompiler.safe_name(repr(f)), filename::String=name; kwargs...)
419+
mkpath(path)
420+
obj_path = joinpath(path, "$filename.o")
421+
lib_path = joinpath(path, "$filename.$(Libdl.dlext)")
422+
job, kwargs = native_job(f, tt; name, kwargs...)
423+
obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false)
340424

425+
open(obj_path, "w") do io
341426
write(io, obj)
342-
flush(io)
343-
344-
# Pick a compiler
345-
cc = Sys.isapple() ? `cc` : clang()
346-
# Compile!
347-
if Sys.isapple()
348-
# Apple no longer uses _start, so we can just specify a custom entry
349-
entry = "_julia_$name"
350-
run(`$cc -e $entry $obj_path -o $exec_path`)
351-
else
352-
# Write a minimal wrapper to avoid having to specify a custom entry
353-
wrapper_path = joinpath(path, "wrapper.c")
354-
f = open(wrapper_path, "w")
355-
print(f, """int main(int argc, char** argv)
356-
{
357-
julia_$name(argc, argv);
358-
return 0;
359-
}""")
360-
close(f)
361-
run(`$cc $wrapper_path $obj_path -o $exec_path`)
362-
# Clean up
363-
run(`rm $wrapper_path`)
364-
end
365427
end
428+
429+
# Pick a Clang
430+
cc = Sys.isapple() ? `cc` : clang()
431+
# Compile!
432+
run(`$cc -shared -o $lib_path $obj_path`)
433+
366434
path, name
367435
end
368436

437+
369438
function native_code_llvm(@nospecialize(func), @nospecialize(types); kwargs...)
370439
job, kwargs = native_job(func, types; kwargs...)
371440
GPUCompiler.code_llvm(stdout, job; kwargs...)

test/runtests.jl

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -38,18 +38,6 @@ fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) # This needs to be defined globall
3838
#@test compile(fib2, (Int,))[1](20) == fib(20)
3939
end
4040

41-
# Call binaries for testing
42-
# @testset "Generate binary" begin
43-
# fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2)
44-
# libname = tempname()
45-
# generate_shlib(fib, (Int,), libname)
46-
# ptr = Libdl.dlopen(libname * "." * Libdl.dlext, Libdl.RTLD_LOCAL)
47-
# fptr = Libdl.dlsym(ptr, "julia_fib")
48-
# @assert fptr != C_NULL
49-
# # This works on REPL
50-
# @test_skip ccall(fptr, Int, (Int,), 10) == 55
51-
# end
52-
5341

5442
@testset "Loops" begin
5543
function sum_first_N_int(N)
@@ -154,7 +142,7 @@ end
154142
e
155143
end
156144
end
157-
@test fetch(tsk) isa DomainError
145+
@test fetch(tsk) isa DomainError
158146
end
159147

160148
# Julia wants to treat Tuple (and other things like it) as plain bits, but LLVM wants to treat it as something with a pointer.
@@ -175,7 +163,7 @@ end
175163
BLAS.dot(N, a, 1, a, 1)
176164
end
177165
a = [1.0, 2.0]
178-
166+
179167
mydot_compiled, path = compile(mydot, (Vector{Float64},))
180168
# Works locally for me, but not on CI. Need some improvements to pointer relocation to be robust.
181169
@test_skip remote_load_call(path, a) == 5.0
@@ -250,6 +238,22 @@ end
250238
@test C A*B
251239
end
252240

241+
@testset "Standalone Dylibs" begin
242+
# Test function
243+
# (already defined)
244+
# fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2)
245+
246+
#Compile dylib
247+
name = repr(fib)
248+
filepath = compile_shlib(fib, (Int,), "./", name)
249+
@test occursin("fib.$(Libdl.dlext)", filepath)
250+
251+
# Open dylib
252+
ptr = Libdl.dlopen(filepath, Libdl.RTLD_LOCAL)
253+
fptr = Libdl.dlsym(ptr, "julia_$name")
254+
@test fptr != C_NULL
255+
@test ccall(fptr, Int, (Int,), 10) == 55
256+
end
253257

254258

255259
@testset "Standalone Executables" begin

0 commit comments

Comments
 (0)