Skip to content

Commit 0b60125

Browse files
authored
[typespec-metadata] Group emitter output by language (#4219)
Allow more than one set of emitter metadata per language. See: Azure/azure-sdk-tools#14975
1 parent 98f1b4b commit 0b60125

7 files changed

Lines changed: 320 additions & 129 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
changeKind: internal
3+
packages:
4+
- "@azure-tools/typespec-metadata"
5+
---
6+
7+
Dummy

packages/typespec-metadata/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# @azure-tools/typespec-metadata
22

3+
## 0.2.0
4+
- Group emitters by normalized language key in the `languages` output. Each language key now maps to an array of emitter configs instead of a single config, allowing multiple emitters per language (e.g. two C# emitters). Unrecognized emitters are grouped under `"unknown"`. Language is inferred by heuristic when the emitter is not in the built-in registry.
5+
6+
37
## 0.1.3
48
- Fixes issue with azurev2 flavored package names.
59

packages/typespec-metadata/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@azure-tools/typespec-metadata",
3-
"version": "0.1.3",
3+
"version": "0.2.0",
44
"description": "TypeSpec emitter that produces structured metadata snapshots for APIView and other tooling.",
55
"type": "module",
66
"license": "MIT",

packages/typespec-metadata/src/collector.ts

Lines changed: 47 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -59,23 +59,34 @@ function extractParameters(
5959
return params;
6060
}
6161

62-
interface EmitterRegistration {
63-
language: string;
64-
parser: LanguageParser;
65-
}
66-
67-
const EMITTER_REGISTRY: Record<string, EmitterRegistration> = {
68-
"@azure-tools/typespec-csharp": { language: "csharp", parser: parseCSharp },
69-
"@azure-tools/typespec-java": { language: "java", parser: parseJava },
70-
"@azure-tools/typespec-python": { language: "python", parser: parsePython },
71-
"@azure-tools/typespec-ts": { language: "typescript", parser: parseTypeScript },
72-
"@azure-tools/typespec-go": { language: "go", parser: parseGo },
73-
"@azure-tools/typespec-rust": { language: "rust", parser: parseRust },
74-
"@azure-typespec/http-client-csharp": { language: "http-client-csharp", parser: parseCSharp },
75-
"@azure-typespec/http-client-csharp-mgmt": {
76-
language: "http-client-csharp-mgmt",
77-
parser: parseCSharp,
78-
},
62+
/**
63+
* Heuristic patterns used to infer a normalized language key from an emitter
64+
* package name.
65+
*
66+
* Order matters: more specific patterns (e.g. "typescript") must precede
67+
* shorter ones (e.g. "java") to avoid false positives ("javascript" matching
68+
* "java").
69+
*/
70+
const LANGUAGE_HEURISTICS: Array<[RegExp, string]> = [
71+
[/csharp/i, "csharp"],
72+
[/python/i, "python"],
73+
[/typescript/i, "typescript"],
74+
[/\bts\b/i, "typescript"],
75+
[/javascript/i, "javascript"],
76+
[/java(?!script)/i, "java"],
77+
[/\brust\b/i, "rust"],
78+
[/\bswift\b/i, "swift"],
79+
[/\bgo\b/i, "go"],
80+
];
81+
82+
/** Maps a normalized language key to its parser so heuristic matches can reuse language-specific logic. */
83+
const LANGUAGE_PARSERS: Record<string, LanguageParser> = {
84+
csharp: parseCSharp,
85+
java: parseJava,
86+
python: parsePython,
87+
typescript: parseTypeScript,
88+
go: parseGo,
89+
rust: parseRust,
7990
};
8091

8192
interface LanguageParserResult {
@@ -300,7 +311,7 @@ function parseRust(
300311
}
301312

302313
export interface LanguageCollectionResult {
303-
languages: Record<string, LanguagePackageMetadata>;
314+
languages: Record<string, LanguagePackageMetadata[]>;
304315
sourceConfigPath?: string;
305316
}
306317

@@ -469,8 +480,8 @@ export function buildLanguageMetadata(
469480
params: Record<string, unknown>,
470481
baseOutputDir: string,
471482
defaultServiceDir?: string,
472-
): Record<string, LanguagePackageMetadata> {
473-
const languagesDict: Record<string, LanguagePackageMetadata> = {};
483+
): Record<string, LanguagePackageMetadata[]> {
484+
const languagesDict: Record<string, LanguagePackageMetadata[]> = {};
474485

475486
for (const [emitterName, emitterOptions] of Object.entries(optionMap)) {
476487
const metadata = createLanguageMetadata(
@@ -481,7 +492,10 @@ export function buildLanguageMetadata(
481492
defaultServiceDir,
482493
);
483494
const language = inferLanguageFromEmitterName(emitterName);
484-
languagesDict[language] = metadata;
495+
if (!languagesDict[language]) {
496+
languagesDict[language] = [];
497+
}
498+
languagesDict[language].push(metadata);
485499
}
486500

487501
return languagesDict;
@@ -509,11 +523,11 @@ function createLanguageMetadata(
509523
let packageName: string | undefined;
510524
let namespace: string | undefined;
511525

512-
const normalizedEmitterName = emitterName.toLowerCase();
513-
const registration = EMITTER_REGISTRY[normalizedEmitterName];
514-
515-
if (registration) {
516-
const result = registration.parser(normalizedOptions, params);
526+
// Use heuristic language match to pick a language-specific parser
527+
const heuristicLang = inferLanguageFromEmitterName(emitterName);
528+
const heuristicParser = LANGUAGE_PARSERS[heuristicLang];
529+
if (heuristicParser) {
530+
const result = heuristicParser(normalizedOptions, params);
517531
packageName = result.packageName;
518532
namespace = result.namespace;
519533
} else {
@@ -591,12 +605,15 @@ function normalizeKey(key: string): string {
591605

592606
export function inferLanguageFromEmitterName(emitterName: string): string {
593607
const normalized = emitterName.toLowerCase();
594-
const registration = EMITTER_REGISTRY[normalized];
595-
if (registration) {
596-
return registration.language;
608+
609+
// Heuristic: scan the emitter name for known language keywords
610+
for (const [pattern, language] of LANGUAGE_HEURISTICS) {
611+
if (pattern.test(normalized)) {
612+
return language;
613+
}
597614
}
598615

599-
return emitterName;
616+
return "unknown";
600617
}
601618

602619
function trimOrUndefined(value: string | undefined): string | undefined {

packages/typespec-metadata/src/metadata.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ export interface MetadataSnapshot {
2929
generatedAt: string;
3030
/** TypeSpec-level metadata (namespace, documentation, type). */
3131
typespec: TypeSpecMetadata;
32-
/** Per-language package metadata extracted from tspconfig, keyed by language. */
33-
languages: Record<string, LanguagePackageMetadata>;
32+
/** Per-language package metadata extracted from tspconfig, keyed by normalized language name. Each language maps to an array of emitter configs. Emitters that cannot be linked to a known language are grouped under "unknown". */
33+
languages: Record<string, LanguagePackageMetadata[]>;
3434
/** Absolute tspconfig path when available. */
3535
sourceConfigPath?: string;
3636
}

0 commit comments

Comments
 (0)