Skip to content

Commit 4b9995c

Browse files
Fix DecimalConverter to support scientific notation (#36)
* Fix DecimalConverter to support scientific notation - Changed NumberStyles from Number to Float | AllowThousands - This enables parsing of scientific notation values (e.g., 1.2345678E5) - Aligns DecimalConverter with SingleConverter and DoubleConverter behavior - Added comprehensive tests for scientific notation parsing Fixes #35 Co-authored-by: Chrissy LeMaire <potatoqualitee@users.noreply.github.com> * Add MoneyConverter and VectorConverter support - Added MoneyConverter with NumberStyles.Currency for SQL Server money/smallmoney types - Supports currency symbols ($, €, £, etc.) - Supports thousands separators (1,234.56) - Supports negative parentheses accounting format ((0.00)) - Supports scientific notation - Added VectorConverter for SQL Server 2025 VECTOR data type - Supports JSON array format: [0.1, 0.2, 0.3] - Supports comma-separated format: 0.1, 0.2, 0.3 - Supports scientific notation in vector elements - Handles large embeddings (tested with 100+ dimensions) - Added comprehensive test coverage for both converters - MoneyConverter: currency symbols, thousands separators, scientific notation - VectorConverter: JSON arrays, comma-separated, scientific notation, edge cases Co-authored-by: Chrissy LeMaire <potatoqualitee@users.noreply.github.com> * Register VectorConverter and document MoneyConverter usage - Register VectorConverter in TypeConverterRegistry for float[] type - Add comprehensive XML documentation to MoneyConverter explaining manual registration requirement - Remove unused System.Linq import from VectorConverter - MoneyConverter is not auto-registered due to type conflict with DecimalConverter Co-authored-by: Chrissy LeMaire <potatoqualitee@users.noreply.github.com> * Address code review feedback - Fix VectorConverter null safety for empty arrays - Optimize VectorConverter.Split with cached separator - Update MoneyConverter docs with registry cloning example - Document AllowThousands breaking change in DecimalConverter - Add comprehensive tests for thousands separators - Add edge case tests (overflow, invalid notation) - Add culture-aware decimal parsing tests - Add VectorConverter performance benchmarks Co-authored-by: Chrissy LeMaire <potatoqualitee@users.noreply.github.com> --------- Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com>
1 parent af724b2 commit 4b9995c

File tree

5 files changed

+452
-2
lines changed

5 files changed

+452
-2
lines changed
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
using BenchmarkDotNet.Attributes;
2+
using Dataplat.Dbatools.Csv.TypeConverters;
3+
using System.Linq;
4+
5+
namespace CsvBenchmarks;
6+
7+
/// <summary>
8+
/// Benchmarks for TypeConverter implementations, focusing on performance-critical scenarios.
9+
/// </summary>
10+
[MemoryDiagnoser]
11+
[RankColumn]
12+
public class TypeConverterBenchmarks
13+
{
14+
private string _smallVector;
15+
private string _largeVector;
16+
private string _openAiVector;
17+
private VectorConverter _converter;
18+
19+
[GlobalSetup]
20+
public void Setup()
21+
{
22+
_converter = VectorConverter.Default;
23+
24+
// Small vector: 3 dimensions
25+
_smallVector = "[0.1, 0.2, 0.3]";
26+
27+
// Large vector: 100 dimensions
28+
_largeVector = "[" + string.Join(", ", Enumerable.Range(0, 100).Select(i => (i * 0.01f).ToString("F3"))) + "]";
29+
30+
// OpenAI ada-002 embedding size: 1536 dimensions
31+
_openAiVector = "[" + string.Join(", ", Enumerable.Range(0, 1536).Select(i => (i * 0.001f).ToString("F4"))) + "]";
32+
}
33+
34+
[Benchmark(Baseline = true, Description = "Small Vector (3 dims)")]
35+
[BenchmarkCategory("VectorConverter")]
36+
public float[] VectorConverter_Small()
37+
{
38+
_converter.TryConvert(_smallVector, out float[] result);
39+
return result;
40+
}
41+
42+
[Benchmark(Description = "Large Vector (100 dims)")]
43+
[BenchmarkCategory("VectorConverter")]
44+
public float[] VectorConverter_Large()
45+
{
46+
_converter.TryConvert(_largeVector, out float[] result);
47+
return result;
48+
}
49+
50+
[Benchmark(Description = "OpenAI Vector (1536 dims)")]
51+
[BenchmarkCategory("VectorConverter")]
52+
public float[] VectorConverter_OpenAI()
53+
{
54+
_converter.TryConvert(_openAiVector, out float[] result);
55+
return result;
56+
}
57+
58+
[Benchmark(Description = "Scientific Notation")]
59+
[BenchmarkCategory("VectorConverter")]
60+
public float[] VectorConverter_Scientific()
61+
{
62+
_converter.TryConvert("[1.5e-3, 2.0E2, -3.5e1]", out float[] result);
63+
return result;
64+
}
65+
}

project/dbatools.Tests/Csv/TypeConverterTest.cs

Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using System.Linq;
23
using Microsoft.VisualStudio.TestTools.UnitTesting;
34
using Dataplat.Dbatools.Csv.TypeConverters;
45

@@ -161,6 +162,269 @@ public void TestDecimalConverter()
161162
Assert.AreEqual(1234567890.123456m, result);
162163
}
163164

165+
[TestMethod]
166+
public void TestDecimalConverterScientificNotation()
167+
{
168+
var converter = DecimalConverter.Default;
169+
170+
// Test case from issue #35
171+
Assert.IsTrue(converter.TryConvert("1.2345678E5", out decimal result));
172+
Assert.AreEqual(123456.78m, result);
173+
174+
// Additional scientific notation tests
175+
Assert.IsTrue(converter.TryConvert("1.5e10", out result));
176+
Assert.AreEqual(15000000000m, result);
177+
178+
Assert.IsTrue(converter.TryConvert("2.5E-3", out result));
179+
Assert.AreEqual(0.0025m, result);
180+
181+
Assert.IsTrue(converter.TryConvert("-3.14E2", out result));
182+
Assert.AreEqual(-314m, result);
183+
}
184+
185+
[TestMethod]
186+
public void TestDecimalConverterThousandsSeparator()
187+
{
188+
var converter = DecimalConverter.Default;
189+
190+
// Test thousands separator (culture-aware)
191+
Assert.IsTrue(converter.TryConvert("1,234.56", out decimal result));
192+
Assert.AreEqual(1234.56m, result);
193+
194+
// Test multiple thousands separators
195+
Assert.IsTrue(converter.TryConvert("1,234,567.89", out result));
196+
Assert.AreEqual(1234567.89m, result);
197+
198+
// Test negative with thousands separator
199+
Assert.IsTrue(converter.TryConvert("-1,234.56", out result));
200+
Assert.AreEqual(-1234.56m, result);
201+
}
202+
203+
[TestMethod]
204+
public void TestDecimalConverterEdgeCases()
205+
{
206+
var converter = DecimalConverter.Default;
207+
208+
// Test zero
209+
Assert.IsTrue(converter.TryConvert("0", out decimal result));
210+
Assert.AreEqual(0m, result);
211+
212+
// Test zero in scientific notation
213+
Assert.IsTrue(converter.TryConvert("0.0E0", out result));
214+
Assert.AreEqual(0m, result);
215+
216+
// Test very small number
217+
Assert.IsTrue(converter.TryConvert("1E-28", out result));
218+
Assert.AreEqual(0.0000000000000000000000000001m, result);
219+
220+
// Test near maximum value (decimal.MaxValue is ~7.9E+28)
221+
Assert.IsTrue(converter.TryConvert("1E+28", out result));
222+
Assert.AreEqual(10000000000000000000000000000m, result);
223+
224+
// Test overflow - should fail gracefully
225+
Assert.IsFalse(converter.TryConvert("1E+30", out _));
226+
227+
// Test invalid scientific notation
228+
Assert.IsFalse(converter.TryConvert("1E", out _));
229+
Assert.IsFalse(converter.TryConvert("E5", out _));
230+
}
231+
232+
[TestMethod]
233+
public void TestDecimalConverterDifferentCultures()
234+
{
235+
// Test with German culture (uses comma as decimal separator)
236+
var germanConverter = new DecimalConverter();
237+
germanConverter.FormatProvider = System.Globalization.CultureInfo.GetCultureInfo("de-DE");
238+
239+
Assert.IsTrue(germanConverter.TryConvert("1234,56", out decimal result));
240+
Assert.AreEqual(1234.56m, result);
241+
242+
// Test with French culture (uses space as thousands separator, comma as decimal)
243+
var frenchConverter = new DecimalConverter();
244+
frenchConverter.FormatProvider = System.Globalization.CultureInfo.GetCultureInfo("fr-FR");
245+
246+
Assert.IsTrue(frenchConverter.TryConvert("1 234,56", out result));
247+
Assert.AreEqual(1234.56m, result);
248+
}
249+
250+
[TestMethod]
251+
public void TestMoneyConverter()
252+
{
253+
var converter = MoneyConverter.Default;
254+
255+
// Test basic decimal values
256+
Assert.IsTrue(converter.TryConvert("123.45", out decimal result));
257+
Assert.AreEqual(123.45m, result);
258+
259+
// Test negative values
260+
Assert.IsTrue(converter.TryConvert("-99.99", out result));
261+
Assert.AreEqual(-99.99m, result);
262+
}
263+
264+
[TestMethod]
265+
public void TestMoneyConverterWithCurrencySymbols()
266+
{
267+
var converter = MoneyConverter.Default;
268+
269+
// Test US dollar sign
270+
Assert.IsTrue(converter.TryConvert("$123.45", out decimal result));
271+
Assert.AreEqual(123.45m, result);
272+
273+
// Test negative with dollar sign
274+
Assert.IsTrue(converter.TryConvert("-$99.99", out result));
275+
Assert.AreEqual(-99.99m, result);
276+
277+
// Test parentheses for negative (accounting format)
278+
Assert.IsTrue(converter.TryConvert("($50.00)", out result));
279+
Assert.AreEqual(-50.00m, result);
280+
}
281+
282+
[TestMethod]
283+
public void TestMoneyConverterWithThousandsSeparator()
284+
{
285+
var converter = MoneyConverter.Default;
286+
287+
// Test with thousands separator
288+
Assert.IsTrue(converter.TryConvert("$1,234.56", out decimal result));
289+
Assert.AreEqual(1234.56m, result);
290+
291+
// Test large number with currency
292+
Assert.IsTrue(converter.TryConvert("$1,234,567.89", out result));
293+
Assert.AreEqual(1234567.89m, result);
294+
}
295+
296+
[TestMethod]
297+
public void TestMoneyConverterScientificNotation()
298+
{
299+
var converter = MoneyConverter.Default;
300+
301+
// NumberStyles.Currency includes AllowExponent, so scientific notation should work
302+
Assert.IsTrue(converter.TryConvert("1.5E3", out decimal result));
303+
Assert.AreEqual(1500m, result);
304+
305+
Assert.IsTrue(converter.TryConvert("2.5E-2", out result));
306+
Assert.AreEqual(0.025m, result);
307+
}
308+
309+
[TestMethod]
310+
public void TestMoneyConverterInvalidInput()
311+
{
312+
var converter = MoneyConverter.Default;
313+
314+
Assert.IsFalse(converter.TryConvert("invalid", out _));
315+
Assert.IsFalse(converter.TryConvert("", out _));
316+
Assert.IsFalse(converter.TryConvert(null, out _));
317+
}
318+
319+
#endregion
320+
321+
#region Vector Converter Tests
322+
323+
[TestMethod]
324+
public void TestVectorConverterJsonArrayFormat()
325+
{
326+
var converter = VectorConverter.Default;
327+
328+
// Test JSON array format
329+
Assert.IsTrue(converter.TryConvert("[0.1, 0.2, 0.3]", out float[] result));
330+
Assert.AreEqual(3, result.Length);
331+
Assert.AreEqual(0.1f, result[0], 0.0001f);
332+
Assert.AreEqual(0.2f, result[1], 0.0001f);
333+
Assert.AreEqual(0.3f, result[2], 0.0001f);
334+
}
335+
336+
[TestMethod]
337+
public void TestVectorConverterCommaSeparated()
338+
{
339+
var converter = VectorConverter.Default;
340+
341+
// Test comma-separated format (no brackets)
342+
Assert.IsTrue(converter.TryConvert("0.5, 1.0, 1.5", out float[] result));
343+
Assert.AreEqual(3, result.Length);
344+
Assert.AreEqual(0.5f, result[0], 0.0001f);
345+
Assert.AreEqual(1.0f, result[1], 0.0001f);
346+
Assert.AreEqual(1.5f, result[2], 0.0001f);
347+
}
348+
349+
[TestMethod]
350+
public void TestVectorConverterScientificNotation()
351+
{
352+
var converter = VectorConverter.Default;
353+
354+
// Test scientific notation in vectors
355+
Assert.IsTrue(converter.TryConvert("[1.5e-3, 2.0E2, -3.5e1]", out float[] result));
356+
Assert.AreEqual(3, result.Length);
357+
Assert.AreEqual(0.0015f, result[0], 0.000001f);
358+
Assert.AreEqual(200.0f, result[1], 0.0001f);
359+
Assert.AreEqual(-35.0f, result[2], 0.0001f);
360+
}
361+
362+
[TestMethod]
363+
public void TestVectorConverterNegativeValues()
364+
{
365+
var converter = VectorConverter.Default;
366+
367+
// Test negative values
368+
Assert.IsTrue(converter.TryConvert("[-0.5, -1.0, -1.5]", out float[] result));
369+
Assert.AreEqual(3, result.Length);
370+
Assert.AreEqual(-0.5f, result[0], 0.0001f);
371+
Assert.AreEqual(-1.0f, result[1], 0.0001f);
372+
Assert.AreEqual(-1.5f, result[2], 0.0001f);
373+
}
374+
375+
[TestMethod]
376+
public void TestVectorConverterLargeEmbedding()
377+
{
378+
var converter = VectorConverter.Default;
379+
380+
// Test realistic embedding size (e.g., OpenAI ada-002 uses 1536 dimensions)
381+
// Create a sample with 100 dimensions for testing
382+
string vectorString = "[" + string.Join(", ", Enumerable.Range(0, 100).Select(i => (i * 0.01f).ToString("F3"))) + "]";
383+
384+
Assert.IsTrue(converter.TryConvert(vectorString, out float[] result));
385+
Assert.AreEqual(100, result.Length);
386+
Assert.AreEqual(0.0f, result[0], 0.0001f);
387+
Assert.AreEqual(0.99f, result[99], 0.0001f);
388+
}
389+
390+
[TestMethod]
391+
public void TestVectorConverterWhitespaceHandling()
392+
{
393+
var converter = VectorConverter.Default;
394+
395+
// Test various whitespace scenarios
396+
Assert.IsTrue(converter.TryConvert(" [ 0.1 , 0.2 , 0.3 ] ", out float[] result));
397+
Assert.AreEqual(3, result.Length);
398+
399+
Assert.IsTrue(converter.TryConvert("0.1,0.2,0.3", out result)); // No spaces
400+
Assert.AreEqual(3, result.Length);
401+
}
402+
403+
[TestMethod]
404+
public void TestVectorConverterInvalidInput()
405+
{
406+
var converter = VectorConverter.Default;
407+
408+
// Test invalid inputs
409+
Assert.IsFalse(converter.TryConvert("", out _));
410+
Assert.IsFalse(converter.TryConvert(null, out _));
411+
Assert.IsFalse(converter.TryConvert("[]", out _)); // Empty array
412+
Assert.IsFalse(converter.TryConvert("[not, a, number]", out _));
413+
Assert.IsFalse(converter.TryConvert("[0.1, invalid, 0.3]", out _));
414+
Assert.IsFalse(converter.TryConvert("[", out _)); // Malformed
415+
}
416+
417+
[TestMethod]
418+
public void TestVectorConverterSingleValue()
419+
{
420+
var converter = VectorConverter.Default;
421+
422+
// Test single-value vector
423+
Assert.IsTrue(converter.TryConvert("[42.5]", out float[] result));
424+
Assert.AreEqual(1, result.Length);
425+
Assert.AreEqual(42.5f, result[0], 0.0001f);
426+
}
427+
164428
#endregion
165429

166430
#region Type Converter Registry Tests

0 commit comments

Comments
 (0)