forked from github/codeql
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathannotate-overlay-local.py
More file actions
175 lines (147 loc) · 6.75 KB
/
annotate-overlay-local.py
File metadata and controls
175 lines (147 loc) · 6.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# This script is used to annotate .qll files with overlay[local?] annotations.
# It will walk the directory tree and annotate most .qll files, skipping only
# some specific cases (e.g., empty files, files that configure dataflow for queries).
# It will also add overlay[caller] annotations to predicates that are pragma[inline]
# and either not private or in a hardcoded list of predicates.
# The script takes a list of languages and processes the corresponding directories.
# Usage: python3 annotate-overlay-local.py <language1> <language2> ...
# The script will modify the files in place and print the changes made.
# The script is designed to be run from the root of the repository.
#!/usr/bin/python3
import sys
import os
from difflib import *
# These are the only two predicates that are pragma[inline], private, and must be
# overlay[caller] in order to successfully compile our internal java queries.
hardcoded_overlay_caller_preds = [
"fwdFlowInCand", "fwdFlowInCandTypeFlowDisabled"]
def filter_out_annotations(filename):
'''
Read the file and strip all existing overlay[...] annotations from the contents.
Return the file modified file content as a list of lines.
'''
overlays = ["local?", "caller"]
annotations = [f"overlay[{t}]" for t in overlays]
with open(filename, 'r') as file_in:
lines = [l for l in file_in if not l.strip() in annotations]
for ann in annotations:
if any(line for line in lines if ann in line):
raise Exception(f"Failed to filter out {ann} from {filename}.")
return lines
def insert_toplevel_maybe_local_anntotation(filename, lines):
'''
Find a suitable place to insert an overlay[local?] annotation at the top of the file.
Return a pair: (string describing action taken, modified content as list of lines).
'''
out_lines = []
status = 0
for line in lines:
if status == 0 and line.rstrip().endswith("module;"):
out_lines.append("overlay[local?]\n")
status = 1
out_lines.append(line)
if status == 1:
return (f"Annotating \"{filename}\" via existing file-level module statement", out_lines)
out_lines = []
empty_line_buffer = []
status = 0
for line in lines:
trimmed = line.strip()
if not trimmed:
empty_line_buffer.append(line)
continue
if status <= 1 and trimmed.endswith("*/"):
status = 2
elif status == 0 and trimmed.startswith("/**"):
status = 1
elif status == 0 and not trimmed.startswith("/*"):
out_lines.append("overlay[local?]\n")
out_lines.append("module;\n")
out_lines.append("\n")
status = 3
elif status == 2 and (trimmed.startswith("import ") or trimmed.startswith("private import ")):
out_lines.append("overlay[local?]\n")
out_lines.append("module;\n")
status = 3
elif status == 2 and (trimmed.startswith("class ") or trimmed.startswith("predicate ")
or trimmed.startswith("module ") or trimmed.startswith("signature ")):
out_lines = ["overlay[local?]\n", "module;\n", "\n"] + out_lines
status = 3
elif status == 2 and trimmed.startswith("/*"):
out_lines.append("overlay[local?]\n")
out_lines.append("module;\n")
status = 3
elif status == 2:
status = 4
if empty_line_buffer:
out_lines += empty_line_buffer
empty_line_buffer = []
out_lines.append(line)
if status == 3:
out_lines += empty_line_buffer
if status == 3:
return (f"Annotating \"{filename}\" after file-level module qldoc", out_lines)
raise Exception(f"Failed to annotate \"{filename}\" as overlay[local?].")
def insert_overlay_caller_annotations(lines):
'''
Mark pragma[inline] predicates as overlay[caller] if they are not declared private
or if they are private but are in the list of hardcoded_overlay_caller_preds.
'''
out_lines = []
for i, line in enumerate(lines):
trimmed = line.strip()
if trimmed == "pragma[inline]":
if (not "private" in lines[i+1] or
any(pred in lines[i+1] for pred in hardcoded_overlay_caller_preds)):
whitespace = line[0: line.find(trimmed)]
out_lines.append(f"{whitespace}overlay[caller]\n")
out_lines.append(line)
return out_lines
def annotate_as_appropriate(filename):
'''
Read file and strip all existing overlay[...] annotations from the contents;
then insert new overlay[...] annotations according to heuristics.
Return a pair: (string describing action taken, modified content as list of lines).
'''
lines = filter_out_annotations(filename)
lines = insert_overlay_caller_annotations(lines)
# These simple heuristics filter out those .qll files that we no _not_ want to annotate
# as overlay[local?]. It is not clear that these heuristics are exactly what we want,
# but they seem to work well enough for now (as determined by speed and accuracy numbers).
if (filename.endswith("Test.qll") or
((filename.endswith("Query.qll") or filename.endswith("Config.qll")) and
any("implements DataFlow::ConfigSig" in line for line in lines))):
return (f"Keeping \"{filename}\" global because it configures dataflow for a query", lines)
elif not any(line for line in lines if line.strip()):
return (f"Keeping \"{filename}\" global because it is empty", lines)
return insert_toplevel_maybe_local_anntotation(filename, lines)
def process_single_file(filename):
'''
Process a single file, annotating it as appropriate and writing the changes back to the file.
'''
annotate_result = annotate_as_appropriate(filename)
old = [line for line in open(filename)]
new = annotate_result[1]
if old != new:
diff = context_diff(old, new, fromfile=filename, tofile=filename)
diff = [line for line in diff]
if diff:
print(annotate_result[0])
for line in diff:
print(line.rstrip())
with open(filename, "w") as out_file:
for line in new:
out_file.write(line)
dirs = []
for lang in sys.argv[1:]:
if lang in ["cpp", "go", "csharp", "java", "javascript", "python", "ruby", "rust", "swift"]:
dirs.append(f"{lang}/ql/lib")
else:
raise Exception(f"Unknown language \"{lang}\".")
if dirs:
dirs.append("shared")
for roots in dirs:
for dirpath, dirnames, filenames in os.walk(roots):
for filename in filenames:
if filename.endswith(".qll") and not dirpath.endswith("tutorial"):
process_single_file(os.path.join(dirpath, filename))