public: feature pages

2026-02-07 16:42:02 +00:00
parent ed8b5c667e
commit 6ce9b5dd1b
127 changed files with 3140 additions and 81 deletions
--- a/scripts/add-internal-links.mjs
+++ b/scripts/add-internal-links.mjs
@@ -0,0 +1,258 @@
+#!/usr/bin/env node
+
+/**
+ * Adds internal links to feature pages across MDX content files.
+ *
+ * Rules:
+ * - Only links the FIRST mention of each feature per file
+ * - Skips code blocks, inline code, existing links, headings, JSX tags, imports
+ * - Skips if the feature URL is already linked somewhere on the page
+ * - Skips "data retention" (not about the retention feature)
+ * - Adds `updated: YYYY-MM-DD` to frontmatter of modified articles & guides
+ */
+
+import fs from 'fs';
+import path from 'path';
+import { fileURLToPath } from 'url';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const ROOT = path.resolve(__dirname, '..');
+const CONTENT_DIR = path.join(ROOT, 'apps/public/content');
+const TODAY = '2026-02-07';
+
+// ── Feature definitions ─────────────────────────────────────────────
+// Patterns are tried in order; first match wins for each feature.
+// Longer / more specific patterns come first to avoid partial matches.
+const FEATURES = [
+  {
+    slug: 'event-tracking',
+    url: '/features/event-tracking',
+    patterns: ['event tracking'],
+  },
+  {
+    slug: 'session-tracking',
+    url: '/features/session-tracking',
+    patterns: ['session tracking'],
+  },
+  {
+    slug: 'revenue-tracking',
+    url: '/features/revenue-tracking',
+    patterns: ['revenue tracking'],
+  },
+  {
+    slug: 'data-visualization',
+    url: '/features/data-visualization',
+    patterns: ['data visualization'],
+  },
+  {
+    slug: 'identify-users',
+    url: '/features/identify-users',
+    patterns: ['identify users', 'user identification'],
+  },
+  {
+    slug: 'web-analytics',
+    url: '/features/web-analytics',
+    patterns: ['web analytics'],
+  },
+  {
+    slug: 'funnels',
+    url: '/features/funnels',
+    // "conversion funnel(s)" links to funnels, not conversion
+    patterns: ['conversion funnels', 'conversion funnel', 'funnel analysis', 'funnels', 'funnel'],
+  },
+  {
+    slug: 'retention',
+    url: '/features/retention',
+    // "retention" alone is included but guarded by excludeBefore
+    patterns: ['retention analysis', 'user retention', 'retention rates', 'retention rate', 'retention'],
+    excludeBefore: ['data', 'unlimited'],  // skip "data retention", "unlimited retention"
+    excludeAfter: ['period', 'policy', 'limit', 'of data'],
+  },
+  {
+    slug: 'conversion',
+    url: '/features/conversion',
+    patterns: ['conversion tracking', 'conversion rates', 'conversion rate', 'conversion paths', 'conversions', 'conversion'],
+    excludeBefore: ['data'],
+  },
+];
+
+// Directories to scan (relative to CONTENT_DIR)
+const DIRS = ['articles', 'guides', 'docs', 'pages'];
+// Only these dirs get the `updated` frontmatter field
+const DIRS_WITH_UPDATED = ['articles', 'guides'];
+
+// ── Helpers ──────────────────────────────────────────────────────────
+
+/** Return an array of { start, end } ranges that should NOT be modified. */
+function getSkipZones(text) {
+  const zones = [];
+  let m;
+
+  // Fenced code blocks  ```…```
+  const codeBlock = /```[\s\S]*?```/g;
+  while ((m = codeBlock.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
+
+  // Inline code `…`
+  const inlineCode = /`[^`\n]+`/g;
+  while ((m = inlineCode.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
+
+  // Existing markdown links [text](url)
+  const mdLink = /\[[^\]]*\]\([^)]*\)/g;
+  while ((m = mdLink.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
+
+  // Headings  # … (entire line)
+  const heading = /^#{1,6}\s+.+$/gm;
+  while ((m = heading.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
+
+  // JSX / HTML tags (attributes may contain feature words)
+  const jsxTag = /<[^>]+>/g;
+  while ((m = jsxTag.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
+
+  // import statements
+  const imp = /^import\s+.+$/gm;
+  while ((m = imp.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
+
+  // Frontmatter block
+  const fm = /^---[\s\S]*?---/;
+  if ((m = fm.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
+
+  // Markdown table rows (| … |)
+  const tableRow = /^\|.+\|$/gm;
+  while ((m = tableRow.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
+
+  // > blockquote lines that contain links
+  const bqLink = /^>\s.*\[.*\]\(.*\).*$/gm;
+  while ((m = bqLink.exec(text))) zones.push({ start: m.index, end: m.index + m[0].length });
+
+  return zones;
+}
+
+function overlapsSkipZone(pos, len, zones) {
+  const end = pos + len;
+  return zones.some((z) => !(end <= z.start || pos >= z.end));
+}
+
+function escapeRegex(s) {
+  return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+}
+
+// ── Core processing ──────────────────────────────────────────────────
+
+function processFile(filePath, dir) {
+  let content = fs.readFileSync(filePath, 'utf8');
+
+  const skipZones = getSkipZones(content);
+  const changes = [];
+
+  for (const feature of FEATURES) {
+    // If the file already links to this feature URL, skip entirely
+    if (content.includes(feature.url)) continue;
+
+    let linked = false;
+
+    for (const pattern of feature.patterns) {
+      if (linked) break;
+
+      const re = new RegExp(`\\b${escapeRegex(pattern)}\\b`, 'gi');
+      let m;
+
+      while ((m = re.exec(content))) {
+        // In a skip zone?
+        if (overlapsSkipZone(m.index, m[0].length, skipZones)) continue;
+
+        // Check excludeBefore / excludeAfter
+        if (feature.excludeBefore) {
+          const before = content.slice(Math.max(0, m.index - 20), m.index).toLowerCase();
+          if (feature.excludeBefore.some((w) => before.endsWith(w + ' '))) continue;
+        }
+        if (feature.excludeAfter) {
+          const after = content.slice(m.index + m[0].length, m.index + m[0].length + 20).toLowerCase();
+          if (feature.excludeAfter.some((w) => after.startsWith(' ' + w))) continue;
+        }
+
+        // Build replacement
+        const replacement = `[${m[0]}](/features/${feature.slug})`;
+        content =
+          content.slice(0, m.index) +
+          replacement +
+          content.slice(m.index + m[0].length);
+
+        // Add the new link as a skip zone and shift all subsequent zones
+        const lenDiff = replacement.length - m[0].length;
+        skipZones.push({ start: m.index, end: m.index + replacement.length });
+        for (const z of skipZones) {
+          if (z.start > m.index + m[0].length) {
+            z.start += lenDiff;
+            z.end += lenDiff;
+          }
+        }
+
+        changes.push({ feature: feature.slug, matched: m[0] });
+        linked = true;
+        break;
+      }
+    }
+  }
+
+  if (changes.length === 0) return null;
+
+  // Add / update the `updated` frontmatter field for articles & guides
+  if (DIRS_WITH_UPDATED.includes(dir)) {
+    const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
+    if (fmMatch) {
+      let fm = fmMatch[1];
+      if (/^updated:/m.test(fm)) {
+        fm = fm.replace(/^updated:\s*.+$/m, `updated: ${TODAY}`);
+      } else if (/^date:/m.test(fm)) {
+        fm = fm.replace(/^(date:\s*.+)$/m, `$1\nupdated: ${TODAY}`);
+      } else {
+        fm += `\nupdated: ${TODAY}`;
+      }
+      content = content.replace(fmMatch[0], `---\n${fm}\n---`);
+    }
+  }
+
+  fs.writeFileSync(filePath, content, 'utf8');
+  return changes;
+}
+
+// ── Walk directories ─────────────────────────────────────────────────
+
+function walk(dir) {
+  const entries = fs.readdirSync(dir, { withFileTypes: true });
+  const files = [];
+  for (const e of entries) {
+    const full = path.join(dir, e.name);
+    if (e.isDirectory()) files.push(...walk(full));
+    else if (e.name.endsWith('.mdx')) files.push(full);
+  }
+  return files;
+}
+
+// ── Main ─────────────────────────────────────────────────────────────
+
+const results = [];
+
+for (const dir of DIRS) {
+  const dirPath = path.join(CONTENT_DIR, dir);
+  if (!fs.existsSync(dirPath)) continue;
+
+  for (const file of walk(dirPath)) {
+    const changes = processFile(file, dir);
+    if (changes) {
+      results.push({ file: path.relative(ROOT, file), changes });
+    }
+  }
+}
+
+console.log('=== Internal Linking Report ===\n');
+console.log(`Total files modified: ${results.length}`);
+console.log(`Total links added: ${results.reduce((s, r) => s + r.changes.length, 0)}\n`);
+
+for (const r of results) {
+  console.log(`  ${r.file}`);
+  for (const c of r.changes) {
+    console.log(`    -> "${c.matched}" => /features/${c.feature}`);
+  }
+}
+console.log('\nDone.');