Skip to content

Commit 8fa6ef3

Browse files
committed
Remove scientific notation parsing.
This behaviour comes from the initial pdf.js commit but is wrong and doesn't match other PDF readers like muPDF or pdfium. From PDF Spec 7.3.3: A PDF writer shall not use the PostScript language syntax for numbers with non-decimal radices (such as 16#FFFE) or in exponential format (such as 6.02E23).
1 parent 3c43414 commit 8fa6ef3

5 files changed

Lines changed: 13 additions & 30 deletions

File tree

src/core/parser.js

Lines changed: 4 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -908,7 +908,6 @@ class Lexer {
908908

909909
getNumber() {
910910
let ch = this.currentChar;
911-
let eNotation = false;
912911
let divideBy = 0; // Different from 0 if it's a floating point value.
913912
let sign = 1;
914913

@@ -951,22 +950,15 @@ class Lexer {
951950
}
952951

953952
let baseValue = ch - 0x30; // '0'
954-
let powerValue = 0;
955-
let powerValueSign = 1;
956953

957954
while ((ch = this.nextChar()) >= 0) {
958955
if (ch >= /* '0' = */ 0x30 && ch <= /* '9' = */ 0x39) {
959956
const currentDigit = ch - 0x30; // '0'
960-
if (eNotation) {
961-
// We are after an 'e' or 'E'.
962-
powerValue = powerValue * 10 + currentDigit;
963-
} else {
964-
if (divideBy !== 0) {
965-
// We are after a point.
966-
divideBy *= 10;
967-
}
968-
baseValue = baseValue * 10 + currentDigit;
957+
if (divideBy !== 0) {
958+
// We are after a point.
959+
divideBy *= 10;
969960
}
961+
baseValue = baseValue * 10 + currentDigit;
970962
} else if (ch === /* '.' = */ 0x2e) {
971963
if (divideBy === 0) {
972964
divideBy = 1;
@@ -978,18 +970,6 @@ class Lexer {
978970
// Ignore minus signs in the middle of numbers to match
979971
// Adobe's behavior.
980972
warn("Badly formatted number: minus sign in the middle");
981-
} else if (ch === /* 'E' = */ 0x45 || ch === /* 'e' = */ 0x65) {
982-
// 'E' can be either a scientific notation or the beginning of a new
983-
// operator.
984-
ch = this.peekChar();
985-
if (ch === /* '+' = */ 0x2b || ch === /* '-' = */ 0x2d) {
986-
powerValueSign = ch === 0x2d ? -1 : 1;
987-
this.nextChar(); // Consume the sign character.
988-
} else if (ch < /* '0' = */ 0x30 || ch > /* '9' = */ 0x39) {
989-
// The 'E' must be the beginning of a new operator.
990-
break;
991-
}
992-
eNotation = true;
993973
} else {
994974
// The last character doesn't belong to us.
995975
break;
@@ -999,9 +979,6 @@ class Lexer {
999979
if (divideBy !== 0) {
1000980
baseValue /= divideBy;
1001981
}
1002-
if (eNotation) {
1003-
baseValue *= 10 ** (powerValueSign * powerValue);
1004-
}
1005982
return sign * baseValue;
1006983
}
1007984

test/pdfs/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -874,3 +874,4 @@
874874
!bug2013793.pdf
875875
!bug2014080.pdf
876876
!two_pages.pdf
877+
!sci-notation.pdf

test/pdfs/sci-notation.pdf

657 Bytes
Binary file not shown.

test/test_manifest.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13964,5 +13964,12 @@
1396413964
"firstPage": 171,
1396513965
"lastPage": 171,
1396613966
"type": "eq"
13967+
},
13968+
{
13969+
"id": "sci-notation",
13970+
"file": "pdfs/sci-notation.pdf",
13971+
"md5": "ead167e0328f1a1f4f8901cee501a9c4",
13972+
"rounds": 1,
13973+
"type": "eq"
1396713974
}
1396813975
]

test/unit/parser_spec.js

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,13 +94,11 @@ describe("parser", function () {
9494
expect(lexer.getNumber()).toEqual(11.234);
9595
});
9696

97-
it("should parse PostScript numbers", function () {
97+
it("should parse PDF numbers", function () {
9898
const numbers = [
9999
"-.002",
100100
"34.5",
101101
"-3.62",
102-
"123.6e10",
103-
"1E-5",
104102
"-1.",
105103
"0.0",
106104
"123",

0 commit comments

Comments
 (0)