public: feature pages

This commit is contained in:
Carl-Gerhard Lindesvärd
2026-02-07 16:42:02 +00:00
parent ed8b5c667e
commit 6ce9b5dd1b
127 changed files with 3140 additions and 81 deletions

View File

@@ -0,0 +1,258 @@
#!/usr/bin/env node
/**
* Adds internal links to feature pages across MDX content files.
*
* Rules:
* - Only links the FIRST mention of each feature per file
* - Skips code blocks, inline code, existing links, headings, JSX tags, imports
* - Skips if the feature URL is already linked somewhere on the page
* - Skips "data retention" (not about the retention feature)
* - Adds `updated: YYYY-MM-DD` to frontmatter of modified articles & guides
*/
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const ROOT = path.resolve(__dirname, '..');
const CONTENT_DIR = path.join(ROOT, 'apps/public/content');
const TODAY = '2026-02-07';
// ── Feature definitions ─────────────────────────────────────────────
// Patterns are tried in order; first match wins for each feature.
// Longer / more specific patterns come first to avoid partial matches.
const FEATURES = [
{
slug: 'event-tracking',
url: '/features/event-tracking',
patterns: ['event tracking'],
},
{
slug: 'session-tracking',
url: '/features/session-tracking',
patterns: ['session tracking'],
},
{
slug: 'revenue-tracking',
url: '/features/revenue-tracking',
patterns: ['revenue tracking'],
},
{
slug: 'data-visualization',
url: '/features/data-visualization',
patterns: ['data visualization'],
},
{
slug: 'identify-users',
url: '/features/identify-users',
patterns: ['identify users', 'user identification'],
},
{
slug: 'web-analytics',
url: '/features/web-analytics',
patterns: ['web analytics'],
},
{
slug: 'funnels',
url: '/features/funnels',
// "conversion funnel(s)" links to funnels, not conversion
patterns: ['conversion funnels', 'conversion funnel', 'funnel analysis', 'funnels', 'funnel'],
},
{
slug: 'retention',
url: '/features/retention',
// "retention" alone is included but guarded by excludeBefore
patterns: ['retention analysis', 'user retention', 'retention rates', 'retention rate', 'retention'],
excludeBefore: ['data', 'unlimited'], // skip "data retention", "unlimited retention"
excludeAfter: ['period', 'policy', 'limit', 'of data'],
},
{
slug: 'conversion',
url: '/features/conversion',
patterns: ['conversion tracking', 'conversion rates', 'conversion rate', 'conversion paths', 'conversions', 'conversion'],
excludeBefore: ['data'],
},
];
// Directories to scan (relative to CONTENT_DIR)
const DIRS = ['articles', 'guides', 'docs', 'pages'];
// Only these dirs get the `updated` frontmatter field
const DIRS_WITH_UPDATED = ['articles', 'guides'];
// ── Helpers ──────────────────────────────────────────────────────────
/** Return an array of { start, end } ranges that should NOT be modified. */
function getSkipZones(text) {
const zones = [];
let m;
// Fenced code blocks ```…```
const codeBlock = /```[\s\S]*?```/g;
while ((m = codeBlock.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
// Inline code `…`
const inlineCode = /`[^`\n]+`/g;
while ((m = inlineCode.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
// Existing markdown links [text](url)
const mdLink = /\[[^\]]*\]\([^)]*\)/g;
while ((m = mdLink.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
// Headings # … (entire line)
const heading = /^#{1,6}\s+.+$/gm;
while ((m = heading.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
// JSX / HTML tags (attributes may contain feature words)
const jsxTag = /<[^>]+>/g;
while ((m = jsxTag.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
// import statements
const imp = /^import\s+.+$/gm;
while ((m = imp.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
// Frontmatter block
const fm = /^---[\s\S]*?---/;
if ((m = fm.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
// Markdown table rows (| … |)
const tableRow = /^\|.+\|$/gm;
while ((m = tableRow.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
// > blockquote lines that contain links
const bqLink = /^>\s.*\[.*\]\(.*\).*$/gm;
while ((m = bqLink.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
return zones;
}
function overlapsSkipZone(pos, len, zones) {
const end = pos + len;
return zones.some((z) => !(end <= z.start || pos >= z.end));
}
function escapeRegex(s) {
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
// ── Core processing ──────────────────────────────────────────────────
function processFile(filePath, dir) {
let content = fs.readFileSync(filePath, 'utf8');
const skipZones = getSkipZones(content);
const changes = [];
for (const feature of FEATURES) {
// If the file already links to this feature URL, skip entirely
if (content.includes(feature.url)) continue;
let linked = false;
for (const pattern of feature.patterns) {
if (linked) break;
const re = new RegExp(`\\b${escapeRegex(pattern)}\\b`, 'gi');
let m;
while ((m = re.exec(content))) {
// In a skip zone?
if (overlapsSkipZone(m.index, m[0].length, skipZones)) continue;
// Check excludeBefore / excludeAfter
if (feature.excludeBefore) {
const before = content.slice(Math.max(0, m.index - 20), m.index).toLowerCase();
if (feature.excludeBefore.some((w) => before.endsWith(w + ' '))) continue;
}
if (feature.excludeAfter) {
const after = content.slice(m.index + m[0].length, m.index + m[0].length + 20).toLowerCase();
if (feature.excludeAfter.some((w) => after.startsWith(' ' + w))) continue;
}
// Build replacement
const replacement = `[${m[0]}](/features/${feature.slug})`;
content =
content.slice(0, m.index) +
replacement +
content.slice(m.index + m[0].length);
// Add the new link as a skip zone and shift all subsequent zones
const lenDiff = replacement.length - m[0].length;
skipZones.push({ start: m.index, end: m.index + replacement.length });
for (const z of skipZones) {
if (z.start > m.index + m[0].length) {
z.start += lenDiff;
z.end += lenDiff;
}
}
changes.push({ feature: feature.slug, matched: m[0] });
linked = true;
break;
}
}
}
if (changes.length === 0) return null;
// Add / update the `updated` frontmatter field for articles & guides
if (DIRS_WITH_UPDATED.includes(dir)) {
const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
if (fmMatch) {
let fm = fmMatch[1];
if (/^updated:/m.test(fm)) {
fm = fm.replace(/^updated:\s*.+$/m, `updated: ${TODAY}`);
} else if (/^date:/m.test(fm)) {
fm = fm.replace(/^(date:\s*.+)$/m, `$1\nupdated: ${TODAY}`);
} else {
fm += `\nupdated: ${TODAY}`;
}
content = content.replace(fmMatch[0], `---\n${fm}\n---`);
}
}
fs.writeFileSync(filePath, content, 'utf8');
return changes;
}
// ── Walk directories ─────────────────────────────────────────────────
function walk(dir) {
const entries = fs.readdirSync(dir, { withFileTypes: true });
const files = [];
for (const e of entries) {
const full = path.join(dir, e.name);
if (e.isDirectory()) files.push(...walk(full));
else if (e.name.endsWith('.mdx')) files.push(full);
}
return files;
}
// ── Main ─────────────────────────────────────────────────────────────
const results = [];
for (const dir of DIRS) {
const dirPath = path.join(CONTENT_DIR, dir);
if (!fs.existsSync(dirPath)) continue;
for (const file of walk(dirPath)) {
const changes = processFile(file, dir);
if (changes) {
results.push({ file: path.relative(ROOT, file), changes });
}
}
}
console.log('=== Internal Linking Report ===\n');
console.log(`Total files modified: ${results.length}`);
console.log(`Total links added: ${results.reduce((s, r) => s + r.changes.length, 0)}\n`);
for (const r of results) {
console.log(` ${r.file}`);
for (const c of r.changes) {
console.log(` -> "${c.matched}" => /features/${c.feature}`);
}
}
console.log('\nDone.');