diff --git a/.claude/commands/dedupe.md b/.claude/commands/dedupe.md new file mode 100644 index 0000000000..3f48f3a02a --- /dev/null +++ b/.claude/commands/dedupe.md @@ -0,0 +1,43 @@ +--- +allowed-tools: Bash(gh issue view:*), Bash(gh search:*), Bash(gh issue list:*), Bash(gh api:*), Bash(gh issue comment:*) +description: Find duplicate GitHub issues +--- + +# Issue deduplication command + +Find up to 3 likely duplicate issues for a given GitHub issue. + +To do this, follow these steps precisely: + +1. Use an agent to check if the GitHub issue (a) is closed, (b) does not need to be deduped (eg. because it is broad product feedback without a specific solution, or positive feedback), or (c) already has a duplicate detection comment (check for the exact HTML marker `` in the issue comments - ignore other bot comments). If so, do not proceed. +2. Use an agent to view a GitHub issue, and ask the agent to return a summary of the issue +3. Then, launch 5 parallel agents to search GitHub for duplicates of this issue, using diverse keywords and search approaches, using the summary from Step 2. **IMPORTANT**: Always scope searches with `repo:owner/repo` to constrain results to the current repository only. +4. Next, feed the results from Steps 2 and 3 into another agent, so that it can filter out false positives, that are likely not actually duplicates of the original issue. If there are no duplicates remaining, do not proceed. +5. Finally, comment back on the issue with a list of up to three duplicate issues (or zero, if there are no likely duplicates) + +Notes (be sure to tell this to your agents, too): + +- Use `gh` to interact with GitHub, rather than web fetch +- Do not use other tools, beyond `gh` (eg. don't use other MCP servers, file edit, etc.) +- Make a todo list first +- Always scope searches with `repo:owner/repo` to prevent cross-repo false positives +- For your comment, follow the following format precisely (assuming for this example that you found 3 suspected duplicates): + +--- + +Found 3 possible duplicate issues: + +1. +2. +3. + +This issue will be automatically closed as a duplicate in 3 days. + +- If your issue is a duplicate, please close it and 👍 the existing issue instead +- To prevent auto-closure, add a comment or 👎 this comment + +🤖 Generated with [Claude Code](https://claude.ai/code) + + + +--- diff --git a/.github/workflows/auto-close-duplicates.yml b/.github/workflows/auto-close-duplicates.yml new file mode 100644 index 0000000000..886976bf6a --- /dev/null +++ b/.github/workflows/auto-close-duplicates.yml @@ -0,0 +1,29 @@ +name: Auto-close duplicate issues +on: + schedule: + - cron: "0 9 * * *" + workflow_dispatch: + +jobs: + auto-close-duplicates: + runs-on: ubuntu-latest + timeout-minutes: 10 + concurrency: + group: auto-close-duplicates-${{ github.repository }} + cancel-in-progress: true + permissions: + contents: read + issues: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup Bun + uses: ./.github/actions/setup-bun + + - name: Auto-close duplicate issues + run: bun run scripts/auto-close-duplicates.ts + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_REPOSITORY: ${{ github.repository }} diff --git a/.github/workflows/claude-dedupe-issues.yml b/.github/workflows/claude-dedupe-issues.yml new file mode 100644 index 0000000000..3677f61352 --- /dev/null +++ b/.github/workflows/claude-dedupe-issues.yml @@ -0,0 +1,34 @@ +name: Claude Issue Dedupe +on: + issues: + types: [opened] + workflow_dispatch: + inputs: + issue_number: + description: 'Issue number to process for duplicate detection' + required: true + type: string + +jobs: + claude-dedupe-issues: + runs-on: ubuntu-latest + timeout-minutes: 10 + concurrency: + group: claude-dedupe-issues-${{ github.event.issue.number || inputs.issue_number }} + cancel-in-progress: true + permissions: + contents: read + issues: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Run Claude Code slash command + uses: anthropics/claude-code-base-action@beta + with: + prompt: "/dedupe ${{ github.repository }}/issues/${{ github.event.issue.number || inputs.issue_number }}" + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + claude_args: "--model claude-sonnet-4-5-20250929" + claude_env: | + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/scripts/auto-close-duplicates.ts b/scripts/auto-close-duplicates.ts new file mode 100644 index 0000000000..d0c33575d7 --- /dev/null +++ b/scripts/auto-close-duplicates.ts @@ -0,0 +1,347 @@ +#!/usr/bin/env bun + +declare global { + var process: { + env: Record; + }; +} + +interface GitHubIssue { + number: number; + title: string; + user: { id: number }; + created_at: string; + pull_request?: object; +} + +interface GitHubComment { + id: number; + body: string; + created_at: string; + user: { type?: string; id: number }; +} + +interface GitHubReaction { + user: { id: number }; + content: string; +} + +async function sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +async function githubRequest( + endpoint: string, + token: string, + method: string = "GET", + body?: any, + retryCount: number = 0, +): Promise { + const maxRetries = 3; + + const response = await fetch(`https://api.github.com${endpoint}`, { + method, + headers: { + Authorization: `Bearer ${token}`, + Accept: "application/vnd.github+json", + "User-Agent": "auto-close-duplicates-script", + ...(body && { "Content-Type": "application/json" }), + }, + ...(body && { body: JSON.stringify(body) }), + }); + + // Check rate limit headers + const rateLimitRemaining = response.headers.get("x-ratelimit-remaining"); + const rateLimitReset = response.headers.get("x-ratelimit-reset"); + + if (rateLimitRemaining && parseInt(rateLimitRemaining) < 100) { + console.warn(`[WARNING] GitHub API rate limit low: ${rateLimitRemaining} requests remaining`); + + if (parseInt(rateLimitRemaining) < 10) { + const resetTime = rateLimitReset ? parseInt(rateLimitReset) * 1000 : Date.now() + 60000; + const waitTime = Math.max(0, resetTime - Date.now()); + console.warn(`[WARNING] Rate limit critically low, waiting ${Math.ceil(waitTime / 1000)}s until reset`); + await sleep(waitTime + 1000); // Add 1s buffer + } + } + + // Handle rate limit errors with retry + if (response.status === 429 || response.status === 403) { + if (retryCount >= maxRetries) { + throw new Error(`GitHub API rate limit exceeded after ${maxRetries} retries`); + } + + const retryAfter = response.headers.get("retry-after"); + const waitTime = retryAfter ? parseInt(retryAfter) * 1000 : Math.min(1000 * Math.pow(2, retryCount), 32000); + + console.warn( + `[WARNING] Rate limited (${response.status}), retry ${retryCount + 1}/${maxRetries} after ${waitTime}ms`, + ); + await sleep(waitTime); + + return githubRequest(endpoint, token, method, body, retryCount + 1); + } + + if (!response.ok) { + throw new Error(`GitHub API request failed: ${response.status} ${response.statusText}`); + } + + return response.json(); +} + +async function fetchAllComments( + owner: string, + repo: string, + issueNumber: number, + token: string, +): Promise { + const allComments: GitHubComment[] = []; + let page = 1; + const perPage = 100; + + while (true) { + const comments: GitHubComment[] = await githubRequest( + `/repos/${owner}/${repo}/issues/${issueNumber}/comments?per_page=${perPage}&page=${page}`, + token, + ); + + if (comments.length === 0) break; + + allComments.push(...comments); + page++; + + // Safety limit + if (page > 20) break; + } + + return allComments; +} + +async function fetchAllReactions( + owner: string, + repo: string, + commentId: number, + token: string, + authorId?: number, +): Promise { + const allReactions: GitHubReaction[] = []; + let page = 1; + const perPage = 100; + + while (true) { + const reactions: GitHubReaction[] = await githubRequest( + `/repos/${owner}/${repo}/issues/comments/${commentId}/reactions?per_page=${perPage}&page=${page}`, + token, + ); + + if (reactions.length === 0) break; + + allReactions.push(...reactions); + + // Early exit if we're looking for a specific author and found their -1 reaction + if (authorId && reactions.some(r => r.user.id === authorId && r.content === "-1")) { + console.log(`[DEBUG] Found author thumbs down reaction, short-circuiting pagination`); + break; + } + + page++; + + // Safety limit + if (page > 20) break; + } + + return allReactions; +} + +function escapeRegExp(str: string): string { + return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +function extractDuplicateIssueNumber(commentBody: string, owner: string, repo: string): number | null { + // Escape owner and repo to prevent ReDoS attacks + const escapedOwner = escapeRegExp(owner); + const escapedRepo = escapeRegExp(repo); + + // Try to match same-repo GitHub issue URL format first: https://github.com/owner/repo/issues/123 + const repoUrlPattern = new RegExp(`github\\.com/${escapedOwner}/${escapedRepo}/issues/(\\d+)`); + let match = commentBody.match(repoUrlPattern); + if (match) { + return parseInt(match[1], 10); + } + + // Fallback to #123 format (assumes same repo) + match = commentBody.match(/#(\d+)/); + if (match) { + return parseInt(match[1], 10); + } + + return null; +} + +async function closeIssueAsDuplicate( + owner: string, + repo: string, + issueNumber: number, + duplicateOfNumber: number, + token: string, +): Promise { + // Close the issue as duplicate and add the duplicate label + await githubRequest(`/repos/${owner}/${repo}/issues/${issueNumber}`, token, "PATCH", { + state: "closed", + state_reason: "duplicate", + labels: ["duplicate"], + }); + + await githubRequest(`/repos/${owner}/${repo}/issues/${issueNumber}/comments`, token, "POST", { + body: `This issue has been automatically closed as a duplicate of #${duplicateOfNumber}. + +If this is incorrect, please re-open this issue or create a new one. + +🤖 Generated with [Claude Code](https://claude.ai/code)`, + }); +} + +async function autoCloseDuplicates(): Promise { + console.log("[DEBUG] Starting auto-close duplicates script"); + + const token = process.env.GITHUB_TOKEN; + if (!token) { + throw new Error("GITHUB_TOKEN environment variable is required"); + } + console.log("[DEBUG] GitHub token found"); + + // Parse GITHUB_REPOSITORY (format: "owner/repo") + const repository = process.env.GITHUB_REPOSITORY || "oven-sh/bun"; + const [owner, repo] = repository.split("/"); + if (!owner || !repo) { + throw new Error(`Invalid GITHUB_REPOSITORY format: ${repository}`); + } + console.log(`[DEBUG] Repository: ${owner}/${repo}`); + + const threeDaysAgo = new Date(); + threeDaysAgo.setDate(threeDaysAgo.getDate() - 3); + console.log(`[DEBUG] Checking for duplicate comments older than: ${threeDaysAgo.toISOString()}`); + + console.log("[DEBUG] Fetching open issues created more than 3 days ago..."); + const allIssues: GitHubIssue[] = []; + let page = 1; + const perPage = 100; + + while (true) { + const pageIssues: GitHubIssue[] = await githubRequest( + `/repos/${owner}/${repo}/issues?state=open&per_page=${perPage}&page=${page}`, + token, + ); + + if (pageIssues.length === 0) break; + + // Filter for issues created more than 3 days ago and exclude pull requests + const oldEnoughIssues = pageIssues.filter( + issue => !issue.pull_request && new Date(issue.created_at) <= threeDaysAgo, + ); + + allIssues.push(...oldEnoughIssues); + page++; + + // Safety limit to avoid infinite loops + if (page > 20) break; + } + + const issues = allIssues; + console.log(`[DEBUG] Found ${issues.length} open issues`); + + let processedCount = 0; + let candidateCount = 0; + + for (const issue of issues) { + processedCount++; + console.log(`[DEBUG] Processing issue #${issue.number} (${processedCount}/${issues.length}): ${issue.title}`); + + console.log(`[DEBUG] Fetching comments for issue #${issue.number}...`); + const comments = await fetchAllComments(owner, repo, issue.number, token); + console.log(`[DEBUG] Issue #${issue.number} has ${comments.length} comments`); + + const dupeComments = comments.filter( + comment => + comment.body.includes("Found") && + comment.body.includes("possible duplicate") && + comment.user?.type === "Bot" && + comment.body.includes(""), + ); + console.log(`[DEBUG] Issue #${issue.number} has ${dupeComments.length} duplicate detection comments`); + + if (dupeComments.length === 0) { + console.log(`[DEBUG] Issue #${issue.number} - no duplicate comments found, skipping`); + continue; + } + + const lastDupeComment = dupeComments[dupeComments.length - 1]; + const dupeCommentDate = new Date(lastDupeComment.created_at); + console.log( + `[DEBUG] Issue #${issue.number} - most recent duplicate comment from: ${dupeCommentDate.toISOString()}`, + ); + + if (dupeCommentDate > threeDaysAgo) { + console.log(`[DEBUG] Issue #${issue.number} - duplicate comment is too recent, skipping`); + continue; + } + console.log( + `[DEBUG] Issue #${issue.number} - duplicate comment is old enough (${Math.floor( + (Date.now() - dupeCommentDate.getTime()) / (1000 * 60 * 60 * 24), + )} days)`, + ); + + // Filter for human comments (not bot comments) after the duplicate comment + const commentsAfterDupe = comments.filter( + comment => new Date(comment.created_at) > dupeCommentDate && comment.user?.type !== "Bot", + ); + console.log( + `[DEBUG] Issue #${issue.number} - ${commentsAfterDupe.length} human comments after duplicate detection`, + ); + + if (commentsAfterDupe.length > 0) { + console.log(`[DEBUG] Issue #${issue.number} - has human activity after duplicate comment, skipping`); + continue; + } + + console.log(`[DEBUG] Issue #${issue.number} - checking reactions on duplicate comment...`); + const reactions = await fetchAllReactions(owner, repo, lastDupeComment.id, token, issue.user.id); + console.log(`[DEBUG] Issue #${issue.number} - duplicate comment has ${reactions.length} reactions`); + + const authorThumbsDown = reactions.some( + reaction => reaction.user.id === issue.user.id && reaction.content === "-1", + ); + console.log(`[DEBUG] Issue #${issue.number} - author thumbs down reaction: ${authorThumbsDown}`); + + if (authorThumbsDown) { + console.log(`[DEBUG] Issue #${issue.number} - author disagreed with duplicate detection, skipping`); + continue; + } + + const duplicateIssueNumber = extractDuplicateIssueNumber(lastDupeComment.body, owner, repo); + if (!duplicateIssueNumber) { + console.log(`[DEBUG] Issue #${issue.number} - could not extract duplicate issue number from comment, skipping`); + continue; + } + + candidateCount++; + const issueUrl = `https://github.com/${owner}/${repo}/issues/${issue.number}`; + + try { + console.log(`[INFO] Auto-closing issue #${issue.number} as duplicate of #${duplicateIssueNumber}: ${issueUrl}`); + await closeIssueAsDuplicate(owner, repo, issue.number, duplicateIssueNumber, token); + console.log(`[SUCCESS] Successfully closed issue #${issue.number} as duplicate of #${duplicateIssueNumber}`); + } catch (error) { + console.error(`[ERROR] Failed to close issue #${issue.number} as duplicate: ${error}`); + } + } + + console.log( + `[DEBUG] Script completed. Processed ${processedCount} issues, found ${candidateCount} candidates for auto-close`, + ); +} + +autoCloseDuplicates().catch(console.error); + +// Make it a module +export {};