public: feature pages
This commit is contained in:
258
scripts/add-internal-links.mjs
Normal file
258
scripts/add-internal-links.mjs
Normal file
@@ -0,0 +1,258 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Adds internal links to feature pages across MDX content files.
|
||||
*
|
||||
* Rules:
|
||||
* - Only links the FIRST mention of each feature per file
|
||||
* - Skips code blocks, inline code, existing links, headings, JSX tags, imports
|
||||
* - Skips if the feature URL is already linked somewhere on the page
|
||||
* - Skips "data retention" (not about the retention feature)
|
||||
* - Adds `updated: YYYY-MM-DD` to frontmatter of modified articles & guides
|
||||
*/
|
||||
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const ROOT = path.resolve(__dirname, '..');
|
||||
const CONTENT_DIR = path.join(ROOT, 'apps/public/content');
|
||||
const TODAY = '2026-02-07';
|
||||
|
||||
// ── Feature definitions ─────────────────────────────────────────────
|
||||
// Patterns are tried in order; first match wins for each feature.
|
||||
// Longer / more specific patterns come first to avoid partial matches.
|
||||
const FEATURES = [
|
||||
{
|
||||
slug: 'event-tracking',
|
||||
url: '/features/event-tracking',
|
||||
patterns: ['event tracking'],
|
||||
},
|
||||
{
|
||||
slug: 'session-tracking',
|
||||
url: '/features/session-tracking',
|
||||
patterns: ['session tracking'],
|
||||
},
|
||||
{
|
||||
slug: 'revenue-tracking',
|
||||
url: '/features/revenue-tracking',
|
||||
patterns: ['revenue tracking'],
|
||||
},
|
||||
{
|
||||
slug: 'data-visualization',
|
||||
url: '/features/data-visualization',
|
||||
patterns: ['data visualization'],
|
||||
},
|
||||
{
|
||||
slug: 'identify-users',
|
||||
url: '/features/identify-users',
|
||||
patterns: ['identify users', 'user identification'],
|
||||
},
|
||||
{
|
||||
slug: 'web-analytics',
|
||||
url: '/features/web-analytics',
|
||||
patterns: ['web analytics'],
|
||||
},
|
||||
{
|
||||
slug: 'funnels',
|
||||
url: '/features/funnels',
|
||||
// "conversion funnel(s)" links to funnels, not conversion
|
||||
patterns: ['conversion funnels', 'conversion funnel', 'funnel analysis', 'funnels', 'funnel'],
|
||||
},
|
||||
{
|
||||
slug: 'retention',
|
||||
url: '/features/retention',
|
||||
// "retention" alone is included but guarded by excludeBefore
|
||||
patterns: ['retention analysis', 'user retention', 'retention rates', 'retention rate', 'retention'],
|
||||
excludeBefore: ['data', 'unlimited'], // skip "data retention", "unlimited retention"
|
||||
excludeAfter: ['period', 'policy', 'limit', 'of data'],
|
||||
},
|
||||
{
|
||||
slug: 'conversion',
|
||||
url: '/features/conversion',
|
||||
patterns: ['conversion tracking', 'conversion rates', 'conversion rate', 'conversion paths', 'conversions', 'conversion'],
|
||||
excludeBefore: ['data'],
|
||||
},
|
||||
];
|
||||
|
||||
// Directories to scan (relative to CONTENT_DIR)
|
||||
const DIRS = ['articles', 'guides', 'docs', 'pages'];
|
||||
// Only these dirs get the `updated` frontmatter field
|
||||
const DIRS_WITH_UPDATED = ['articles', 'guides'];
|
||||
|
||||
// ── Helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
/** Return an array of { start, end } ranges that should NOT be modified. */
|
||||
function getSkipZones(text) {
|
||||
const zones = [];
|
||||
let m;
|
||||
|
||||
// Fenced code blocks ```…```
|
||||
const codeBlock = /```[\s\S]*?```/g;
|
||||
while ((m = codeBlock.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
|
||||
|
||||
// Inline code `…`
|
||||
const inlineCode = /`[^`\n]+`/g;
|
||||
while ((m = inlineCode.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
|
||||
|
||||
// Existing markdown links [text](url)
|
||||
const mdLink = /\[[^\]]*\]\([^)]*\)/g;
|
||||
while ((m = mdLink.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
|
||||
|
||||
// Headings # … (entire line)
|
||||
const heading = /^#{1,6}\s+.+$/gm;
|
||||
while ((m = heading.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
|
||||
|
||||
// JSX / HTML tags (attributes may contain feature words)
|
||||
const jsxTag = /<[^>]+>/g;
|
||||
while ((m = jsxTag.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
|
||||
|
||||
// import statements
|
||||
const imp = /^import\s+.+$/gm;
|
||||
while ((m = imp.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
|
||||
|
||||
// Frontmatter block
|
||||
const fm = /^---[\s\S]*?---/;
|
||||
if ((m = fm.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
|
||||
|
||||
// Markdown table rows (| … |)
|
||||
const tableRow = /^\|.+\|$/gm;
|
||||
while ((m = tableRow.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
|
||||
|
||||
// > blockquote lines that contain links
|
||||
const bqLink = /^>\s.*\[.*\]\(.*\).*$/gm;
|
||||
while ((m = bqLink.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
|
||||
|
||||
return zones;
|
||||
}
|
||||
|
||||
function overlapsSkipZone(pos, len, zones) {
|
||||
const end = pos + len;
|
||||
return zones.some((z) => !(end <= z.start || pos >= z.end));
|
||||
}
|
||||
|
||||
function escapeRegex(s) {
|
||||
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
}
|
||||
|
||||
// ── Core processing ──────────────────────────────────────────────────
|
||||
|
||||
function processFile(filePath, dir) {
|
||||
let content = fs.readFileSync(filePath, 'utf8');
|
||||
|
||||
const skipZones = getSkipZones(content);
|
||||
const changes = [];
|
||||
|
||||
for (const feature of FEATURES) {
|
||||
// If the file already links to this feature URL, skip entirely
|
||||
if (content.includes(feature.url)) continue;
|
||||
|
||||
let linked = false;
|
||||
|
||||
for (const pattern of feature.patterns) {
|
||||
if (linked) break;
|
||||
|
||||
const re = new RegExp(`\\b${escapeRegex(pattern)}\\b`, 'gi');
|
||||
let m;
|
||||
|
||||
while ((m = re.exec(content))) {
|
||||
// In a skip zone?
|
||||
if (overlapsSkipZone(m.index, m[0].length, skipZones)) continue;
|
||||
|
||||
// Check excludeBefore / excludeAfter
|
||||
if (feature.excludeBefore) {
|
||||
const before = content.slice(Math.max(0, m.index - 20), m.index).toLowerCase();
|
||||
if (feature.excludeBefore.some((w) => before.endsWith(w + ' '))) continue;
|
||||
}
|
||||
if (feature.excludeAfter) {
|
||||
const after = content.slice(m.index + m[0].length, m.index + m[0].length + 20).toLowerCase();
|
||||
if (feature.excludeAfter.some((w) => after.startsWith(' ' + w))) continue;
|
||||
}
|
||||
|
||||
// Build replacement
|
||||
const replacement = `[${m[0]}](/features/${feature.slug})`;
|
||||
content =
|
||||
content.slice(0, m.index) +
|
||||
replacement +
|
||||
content.slice(m.index + m[0].length);
|
||||
|
||||
// Add the new link as a skip zone and shift all subsequent zones
|
||||
const lenDiff = replacement.length - m[0].length;
|
||||
skipZones.push({ start: m.index, end: m.index + replacement.length });
|
||||
for (const z of skipZones) {
|
||||
if (z.start > m.index + m[0].length) {
|
||||
z.start += lenDiff;
|
||||
z.end += lenDiff;
|
||||
}
|
||||
}
|
||||
|
||||
changes.push({ feature: feature.slug, matched: m[0] });
|
||||
linked = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (changes.length === 0) return null;
|
||||
|
||||
// Add / update the `updated` frontmatter field for articles & guides
|
||||
if (DIRS_WITH_UPDATED.includes(dir)) {
|
||||
const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
|
||||
if (fmMatch) {
|
||||
let fm = fmMatch[1];
|
||||
if (/^updated:/m.test(fm)) {
|
||||
fm = fm.replace(/^updated:\s*.+$/m, `updated: ${TODAY}`);
|
||||
} else if (/^date:/m.test(fm)) {
|
||||
fm = fm.replace(/^(date:\s*.+)$/m, `$1\nupdated: ${TODAY}`);
|
||||
} else {
|
||||
fm += `\nupdated: ${TODAY}`;
|
||||
}
|
||||
content = content.replace(fmMatch[0], `---\n${fm}\n---`);
|
||||
}
|
||||
}
|
||||
|
||||
fs.writeFileSync(filePath, content, 'utf8');
|
||||
return changes;
|
||||
}
|
||||
|
||||
// ── Walk directories ─────────────────────────────────────────────────
|
||||
|
||||
function walk(dir) {
|
||||
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
||||
const files = [];
|
||||
for (const e of entries) {
|
||||
const full = path.join(dir, e.name);
|
||||
if (e.isDirectory()) files.push(...walk(full));
|
||||
else if (e.name.endsWith('.mdx')) files.push(full);
|
||||
}
|
||||
return files;
|
||||
}
|
||||
|
||||
// ── Main ─────────────────────────────────────────────────────────────
|
||||
|
||||
const results = [];
|
||||
|
||||
for (const dir of DIRS) {
|
||||
const dirPath = path.join(CONTENT_DIR, dir);
|
||||
if (!fs.existsSync(dirPath)) continue;
|
||||
|
||||
for (const file of walk(dirPath)) {
|
||||
const changes = processFile(file, dir);
|
||||
if (changes) {
|
||||
results.push({ file: path.relative(ROOT, file), changes });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log('=== Internal Linking Report ===\n');
|
||||
console.log(`Total files modified: ${results.length}`);
|
||||
console.log(`Total links added: ${results.reduce((s, r) => s + r.changes.length, 0)}\n`);
|
||||
|
||||
for (const r of results) {
|
||||
console.log(` ${r.file}`);
|
||||
for (const c of r.changes) {
|
||||
console.log(` -> "${c.matched}" => /features/${c.feature}`);
|
||||
}
|
||||
}
|
||||
console.log('\nDone.');
|
||||
Reference in New Issue
Block a user