Skip to content

Commit f1db8bd

Browse files
sonar-nigel[bot]marc-jasper-sonarsource
authored andcommitted
SONARPY-3976 Rule S8520 "sum()" should not be used with an empty list to concatenate lists (#1012)
Co-authored-by: Marc Jasper <marc.jasper@sonarsource.com> GitOrigin-RevId: fd5138cf008f8139622f3a830a86663d9fe73325
1 parent cc53436 commit f1db8bd

File tree

7 files changed

+200
-0
lines changed

7 files changed

+200
-0
lines changed

python-checks/src/main/java/org/sonar/python/checks/OpenSourceCheckList.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,7 @@ public Stream<Class<?>> getChecks() {
418418
StringLiteralDuplicationCheck.class,
419419
StringReplaceCheck.class,
420420
StrongCryptographicKeysCheck.class,
421+
SumListConcatenationCheck.class,
421422
SklearnCachedPipelineDontAccessTransformersCheck.class,
422423
MissingHyperParameterCheck.class,
423424
NetworkCallsWithoutTimeoutsInLambdaCheck.class,
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) SonarSource Sàrl
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* You can redistribute and/or modify this program under the terms of
7+
* the Sonar Source-Available License Version 1, as published by SonarSource Sàrl.
8+
*
9+
* This program is distributed in the hope that it will be useful,
10+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12+
* See the Sonar Source-Available License for more details.
13+
*
14+
* You should have received a copy of the Sonar Source-Available License
15+
* along with this program; if not, see https://sonarsource.com/license/ssal/
16+
*/
17+
package org.sonar.python.checks;
18+
19+
import javax.annotation.Nullable;
20+
import org.sonar.check.Rule;
21+
import org.sonar.plugins.python.api.PythonSubscriptionCheck;
22+
import org.sonar.plugins.python.api.SubscriptionContext;
23+
import org.sonar.plugins.python.api.tree.CallExpression;
24+
import org.sonar.plugins.python.api.tree.ListLiteral;
25+
import org.sonar.plugins.python.api.tree.RegularArgument;
26+
import org.sonar.plugins.python.api.tree.Tree;
27+
import org.sonar.plugins.python.api.types.v2.matchers.TypeMatcher;
28+
import org.sonar.plugins.python.api.types.v2.matchers.TypeMatchers;
29+
import org.sonar.python.tree.TreeUtils;
30+
31+
@Rule(key = "S8520")
32+
public class SumListConcatenationCheck extends PythonSubscriptionCheck {
33+
34+
private static final String MESSAGE = "Use \"itertools.chain.from_iterable()\" instead of \"sum()\" to flatten or concatenate lists.";
35+
private static final TypeMatcher SUM_MATCHER = TypeMatchers.isType("sum");
36+
37+
@Override
38+
public void initialize(Context context) {
39+
context.registerSyntaxNodeConsumer(Tree.Kind.CALL_EXPR, SumListConcatenationCheck::checkCall);
40+
}
41+
42+
private static void checkCall(SubscriptionContext ctx) {
43+
CallExpression call = (CallExpression) ctx.syntaxNode();
44+
if (!SUM_MATCHER.isTrueFor(call.callee(), ctx)) {
45+
return;
46+
}
47+
RegularArgument startArgument = TreeUtils.nthArgumentOrKeyword(1, "start", call.arguments());
48+
if (isEmptyListLiteral(startArgument)) {
49+
ctx.addIssue(call, MESSAGE);
50+
}
51+
}
52+
53+
private static boolean isEmptyListLiteral(@Nullable RegularArgument argument) {
54+
if (argument == null) {
55+
return false;
56+
}
57+
if (!argument.expression().is(Tree.Kind.LIST_LITERAL)) {
58+
return false;
59+
}
60+
ListLiteral listLiteral = (ListLiteral) argument.expression();
61+
return listLiteral.elements().expressions().isEmpty();
62+
}
63+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
<p>This rule raises an issue when <code>sum()</code> is called with an empty list (<code>[]</code>) as the start parameter to concatenate or flatten
2+
lists.</p>
3+
<h2>Why is this an issue?</h2>
4+
<p>Using <code>sum(list_of_lists, [])</code> to concatenate lists is a common anti-pattern in Python. Because each <code>+</code> concatenation
5+
creates a new list and copies all previous elements into it, the overall operation has quadratic time complexity O(n<sup>2</sup>) instead of the
6+
linear O(n) that proper alternatives achieve.</p>
7+
<p>Additionally, <code>sum()</code> is designed for numeric addition, not list operations. Using it for lists is semantically unclear and may confuse
8+
other developers reading the code.</p>
9+
<h3>What is the potential impact?</h3>
10+
<ul>
11+
<li><strong>performance degradation</strong>: operations that should take milliseconds can take seconds or even minutes with large datasets</li>
12+
<li><strong>memory pressure</strong>: each intermediate concatenation allocates a new list, creating unnecessary memory overhead</li>
13+
<li><strong>scalability issues</strong>: code that works fine in development with small datasets may become unusable in production</li>
14+
</ul>
15+
<h2>How to fix it</h2>
16+
<h3>Code examples</h3>
17+
<h4>Noncompliant code example</h4>
18+
<pre data-diff-id="1" data-diff-type="noncompliant">
19+
list_of_lists = [[1, 2], [3, 4], [5, 6]]
20+
result = sum(list_of_lists, []) # Noncompliant
21+
</pre>
22+
<h4>Compliant solution</h4>
23+
<pre data-diff-id="1" data-diff-type="compliant">
24+
import itertools
25+
26+
list_of_lists = [[1, 2], [3, 4], [5, 6]]
27+
result = list(itertools.chain.from_iterable(list_of_lists))
28+
</pre>
29+
<h3>How does this work?</h3>
30+
<p><code>itertools.chain.from_iterable()</code> creates an iterator that yields elements from each sub-list in sequence without creating intermediate
31+
list copies. This avoids the repeated copying that causes the quadratic behavior of <code>sum()</code> and results in linear O(n) time complexity.</p>
32+
<h2>Resources</h2>
33+
<h3>Documentation</h3>
34+
<ul>
35+
<li>Python Documentation - <a href="https://docs.python.org/3/library/itertools.html#itertools.chain"><code>itertools.chain</code> and
36+
<code>chain.from_iterable</code></a></li>
37+
</ul>
38+
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{
2+
"title": "\"sum()\" should not be used with an empty list to concatenate lists",
3+
"type": "CODE_SMELL",
4+
"status": "ready",
5+
"remediation": {
6+
"func": "Constant\/Issue",
7+
"constantCost": "5min"
8+
},
9+
"tags": [
10+
"performance",
11+
"pitfall"
12+
],
13+
"defaultSeverity": "Major",
14+
"ruleSpecification": "RSPEC-8520",
15+
"sqKey": "S8520",
16+
"scope": "All",
17+
"quickfix": "unknown",
18+
"code": {
19+
"impacts": {
20+
"RELIABILITY": "HIGH"
21+
},
22+
"attribute": "EFFICIENT"
23+
}
24+
}

python-checks/src/main/resources/org/sonar/l10n/py/rules/python/Sonar_way_profile.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@
332332
"S8495",
333333
"S8504",
334334
"S8517",
335+
"S8520",
335336
"S8521"
336337
]
337338
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) SonarSource Sàrl
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* You can redistribute and/or modify this program under the terms of
7+
* the Sonar Source-Available License Version 1, as published by SonarSource Sàrl.
8+
*
9+
* This program is distributed in the hope that it will be useful,
10+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12+
* See the Sonar Source-Available License for more details.
13+
*
14+
* You should have received a copy of the Sonar Source-Available License
15+
* along with this program; if not, see https://sonarsource.com/license/ssal/
16+
*/
17+
package org.sonar.python.checks;
18+
19+
import org.junit.jupiter.api.Test;
20+
import org.sonar.python.checks.utils.PythonCheckVerifier;
21+
22+
class SumListConcatenationCheckTest {
23+
24+
@Test
25+
void test() {
26+
PythonCheckVerifier.verify("src/test/resources/checks/sumListConcatenation.py", new SumListConcatenationCheck());
27+
}
28+
29+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
list_of_lists = [[1, 2], [3, 4]]
2+
3+
4+
def noncompliant_empty_list_as_start():
5+
sum(list_of_lists, []) # Noncompliant {{Use "itertools.chain.from_iterable()" instead of "sum()" to flatten or concatenate lists.}}
6+
# ^^^^^^^^^^^^^^^^^^^^^^
7+
8+
9+
def noncompliant_start_keyword_argument():
10+
sum(list_of_lists, start=[]) # Noncompliant {{Use "itertools.chain.from_iterable()" instead of "sum()" to flatten or concatenate lists.}}
11+
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
12+
13+
14+
def compliant_callee_is_not_builtin_sum(other_sum):
15+
def sum(iterable, start):
16+
result = start
17+
for item in iterable:
18+
result = result + item
19+
return result
20+
21+
sum([[1, 2]], [])
22+
other_sum([[1, 2]], [])
23+
24+
25+
def compliant_no_start_argument():
26+
sum([1, 2, 3])
27+
sum()
28+
29+
30+
def compliant_start_not_empty_list_literal():
31+
sum(list_of_lists, [0])
32+
33+
34+
def compliant_start_not_list_literal():
35+
sum(list_of_lists, 10)
36+
37+
38+
def fn_start_not_syntactic_empty_list(): # FN: only literal []; Name, list(), **kwargs, starred unpack
39+
empty = []
40+
sum(list_of_lists, empty)
41+
sum(list_of_lists, list())
42+
kwargs = {"start": []}
43+
sum(list_of_lists, **kwargs)
44+
sum(list_of_lists, *[[]])

0 commit comments

Comments
 (0)