319 lines
9.1 KiB
JavaScript
319 lines
9.1 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
/**
|
|
* Adds internal links to feature pages across MDX content files.
|
|
*
|
|
* Rules:
|
|
* - Only links the FIRST mention of each feature per file
|
|
* - Skips code blocks, inline code, existing links, headings, JSX tags, imports
|
|
* - Skips if the feature URL is already linked somewhere on the page
|
|
* - Skips "data retention" (not about the retention feature)
|
|
* - Adds `updated: YYYY-MM-DD` to frontmatter of modified articles & guides
|
|
*/
|
|
|
|
import fs from 'fs';
|
|
import path from 'path';
|
|
import { fileURLToPath } from 'url';
|
|
|
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
const ROOT = path.resolve(__dirname, '..');
|
|
const CONTENT_DIR = path.join(ROOT, 'apps/public/content');
|
|
const TODAY = '2026-02-07';
|
|
|
|
// ── Feature definitions ─────────────────────────────────────────────
|
|
// Patterns are tried in order; first match wins for each feature.
|
|
// Longer / more specific patterns come first to avoid partial matches.
|
|
const FEATURES = [
|
|
{
|
|
slug: 'event-tracking',
|
|
url: '/features/event-tracking',
|
|
patterns: ['event tracking'],
|
|
},
|
|
{
|
|
slug: 'session-tracking',
|
|
url: '/features/session-tracking',
|
|
patterns: ['session tracking'],
|
|
},
|
|
{
|
|
slug: 'revenue-tracking',
|
|
url: '/features/revenue-tracking',
|
|
patterns: ['revenue tracking'],
|
|
},
|
|
{
|
|
slug: 'data-visualization',
|
|
url: '/features/data-visualization',
|
|
patterns: ['data visualization'],
|
|
},
|
|
{
|
|
slug: 'identify-users',
|
|
url: '/features/identify-users',
|
|
patterns: ['identify users', 'user identification'],
|
|
},
|
|
{
|
|
slug: 'web-analytics',
|
|
url: '/features/web-analytics',
|
|
patterns: ['web analytics'],
|
|
},
|
|
{
|
|
slug: 'funnels',
|
|
url: '/features/funnels',
|
|
// "conversion funnel(s)" links to funnels, not conversion
|
|
patterns: [
|
|
'conversion funnels',
|
|
'conversion funnel',
|
|
'funnel analysis',
|
|
'funnels',
|
|
'funnel',
|
|
],
|
|
},
|
|
{
|
|
slug: 'retention',
|
|
url: '/features/retention',
|
|
// "retention" alone is included but guarded by excludeBefore
|
|
patterns: [
|
|
'retention analysis',
|
|
'user retention',
|
|
'retention rates',
|
|
'retention rate',
|
|
'retention',
|
|
],
|
|
excludeBefore: ['data', 'unlimited'], // skip "data retention", "unlimited retention"
|
|
excludeAfter: ['period', 'policy', 'limit', 'of data'],
|
|
},
|
|
{
|
|
slug: 'conversion',
|
|
url: '/features/conversion',
|
|
patterns: [
|
|
'conversion tracking',
|
|
'conversion rates',
|
|
'conversion rate',
|
|
'conversion paths',
|
|
'conversions',
|
|
'conversion',
|
|
],
|
|
excludeBefore: ['data'],
|
|
},
|
|
];
|
|
|
|
// Directories to scan (relative to CONTENT_DIR)
|
|
const DIRS = ['articles', 'guides', 'docs', 'pages'];
|
|
// Only these dirs get the `updated` frontmatter field
|
|
const DIRS_WITH_UPDATED = ['articles', 'guides'];
|
|
|
|
// ── Helpers ──────────────────────────────────────────────────────────
|
|
|
|
/** Return an array of { start, end } ranges that should NOT be modified. */
|
|
function getSkipZones(text) {
|
|
const zones = [];
|
|
let m;
|
|
|
|
// Fenced code blocks ```…```
|
|
const codeBlock = /```[\s\S]*?```/g;
|
|
while ((m = codeBlock.exec(text))) {
|
|
zones.push({ start: m.index, end: m.index + m[0].length });
|
|
}
|
|
|
|
// Inline code `…`
|
|
const inlineCode = /`[^`\n]+`/g;
|
|
while ((m = inlineCode.exec(text))) {
|
|
zones.push({ start: m.index, end: m.index + m[0].length });
|
|
}
|
|
|
|
// Existing markdown links [text](url)
|
|
const mdLink = /\[[^\]]*\]\([^)]*\)/g;
|
|
while ((m = mdLink.exec(text))) {
|
|
zones.push({ start: m.index, end: m.index + m[0].length });
|
|
}
|
|
|
|
// Headings # … (entire line)
|
|
const heading = /^#{1,6}\s+.+$/gm;
|
|
while ((m = heading.exec(text))) {
|
|
zones.push({ start: m.index, end: m.index + m[0].length });
|
|
}
|
|
|
|
// JSX / HTML tags (attributes may contain feature words)
|
|
const jsxTag = /<[^>]+>/g;
|
|
while ((m = jsxTag.exec(text))) {
|
|
zones.push({ start: m.index, end: m.index + m[0].length });
|
|
}
|
|
|
|
// import statements
|
|
const imp = /^import\s+.+$/gm;
|
|
while ((m = imp.exec(text))) {
|
|
zones.push({ start: m.index, end: m.index + m[0].length });
|
|
}
|
|
|
|
// Frontmatter block
|
|
const fm = /^---[\s\S]*?---/;
|
|
if ((m = fm.exec(text))) {
|
|
zones.push({ start: m.index, end: m.index + m[0].length });
|
|
}
|
|
|
|
// Markdown table rows (| … |)
|
|
const tableRow = /^\|.+\|$/gm;
|
|
while ((m = tableRow.exec(text))) {
|
|
zones.push({ start: m.index, end: m.index + m[0].length });
|
|
}
|
|
|
|
// > blockquote lines that contain links
|
|
const bqLink = /^>\s.*\[.*\]\(.*\).*$/gm;
|
|
while ((m = bqLink.exec(text))) {
|
|
zones.push({ start: m.index, end: m.index + m[0].length });
|
|
}
|
|
|
|
return zones;
|
|
}
|
|
|
|
function overlapsSkipZone(pos, len, zones) {
|
|
const end = pos + len;
|
|
return zones.some((z) => !(end <= z.start || pos >= z.end));
|
|
}
|
|
|
|
function escapeRegex(s) {
|
|
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
}
|
|
|
|
// ── Core processing ──────────────────────────────────────────────────
|
|
|
|
function processFile(filePath, dir) {
|
|
let content = fs.readFileSync(filePath, 'utf8');
|
|
|
|
const skipZones = getSkipZones(content);
|
|
const changes = [];
|
|
|
|
for (const feature of FEATURES) {
|
|
// If the file already links to this feature URL, skip entirely
|
|
if (content.includes(feature.url)) {
|
|
continue;
|
|
}
|
|
|
|
let linked = false;
|
|
|
|
for (const pattern of feature.patterns) {
|
|
if (linked) {
|
|
break;
|
|
}
|
|
|
|
const re = new RegExp(`\\b${escapeRegex(pattern)}\\b`, 'gi');
|
|
let m;
|
|
|
|
while ((m = re.exec(content))) {
|
|
// In a skip zone?
|
|
if (overlapsSkipZone(m.index, m[0].length, skipZones)) {
|
|
continue;
|
|
}
|
|
|
|
// Check excludeBefore / excludeAfter
|
|
if (feature.excludeBefore) {
|
|
const before = content
|
|
.slice(Math.max(0, m.index - 20), m.index)
|
|
.toLowerCase();
|
|
if (feature.excludeBefore.some((w) => before.endsWith(w + ' '))) {
|
|
continue;
|
|
}
|
|
}
|
|
if (feature.excludeAfter) {
|
|
const after = content
|
|
.slice(m.index + m[0].length, m.index + m[0].length + 20)
|
|
.toLowerCase();
|
|
if (feature.excludeAfter.some((w) => after.startsWith(' ' + w))) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Build replacement
|
|
const replacement = `[${m[0]}](/features/${feature.slug})`;
|
|
content =
|
|
content.slice(0, m.index) +
|
|
replacement +
|
|
content.slice(m.index + m[0].length);
|
|
|
|
// Add the new link as a skip zone and shift all subsequent zones
|
|
const lenDiff = replacement.length - m[0].length;
|
|
skipZones.push({ start: m.index, end: m.index + replacement.length });
|
|
for (const z of skipZones) {
|
|
if (z.start > m.index + m[0].length) {
|
|
z.start += lenDiff;
|
|
z.end += lenDiff;
|
|
}
|
|
}
|
|
|
|
changes.push({ feature: feature.slug, matched: m[0] });
|
|
linked = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (changes.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
// Add / update the `updated` frontmatter field for articles & guides
|
|
if (DIRS_WITH_UPDATED.includes(dir)) {
|
|
const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
|
|
if (fmMatch) {
|
|
let fm = fmMatch[1];
|
|
if (/^updated:/m.test(fm)) {
|
|
fm = fm.replace(/^updated:\s*.+$/m, `updated: ${TODAY}`);
|
|
} else if (/^date:/m.test(fm)) {
|
|
fm = fm.replace(/^(date:\s*.+)$/m, `$1\nupdated: ${TODAY}`);
|
|
} else {
|
|
fm += `\nupdated: ${TODAY}`;
|
|
}
|
|
content = content.replace(fmMatch[0], `---\n${fm}\n---`);
|
|
}
|
|
}
|
|
|
|
fs.writeFileSync(filePath, content, 'utf8');
|
|
return changes;
|
|
}
|
|
|
|
// ── Walk directories ─────────────────────────────────────────────────
|
|
|
|
function walk(dir) {
|
|
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
const files = [];
|
|
for (const e of entries) {
|
|
const full = path.join(dir, e.name);
|
|
if (e.isDirectory()) {
|
|
files.push(...walk(full));
|
|
} else if (e.name.endsWith('.mdx')) {
|
|
files.push(full);
|
|
}
|
|
}
|
|
return files;
|
|
}
|
|
|
|
// ── Main ─────────────────────────────────────────────────────────────
|
|
|
|
const results = [];
|
|
|
|
for (const dir of DIRS) {
|
|
const dirPath = path.join(CONTENT_DIR, dir);
|
|
if (!fs.existsSync(dirPath)) {
|
|
continue;
|
|
}
|
|
|
|
for (const file of walk(dirPath)) {
|
|
const changes = processFile(file, dir);
|
|
if (changes) {
|
|
results.push({ file: path.relative(ROOT, file), changes });
|
|
}
|
|
}
|
|
}
|
|
|
|
console.log('=== Internal Linking Report ===\n');
|
|
console.log(`Total files modified: ${results.length}`);
|
|
console.log(
|
|
`Total links added: ${results.reduce((s, r) => s + r.changes.length, 0)}\n`
|
|
);
|
|
|
|
for (const r of results) {
|
|
console.log(` ${r.file}`);
|
|
for (const c of r.changes) {
|
|
console.log(` -> "${c.matched}" => /features/${c.feature}`);
|
|
}
|
|
}
|
|
console.log('\nDone.');
|