{"benchmark":{"formatVersion":1,"type":"dev.nipmod.source-quality-benchmark.v1","status":"public_benchmark_snapshot","generatedFrom":"pnpm search:benchmark","generatedAt":"2026-05-27T19:50:00.000Z","summary":{"blockedRecommendedCount":0,"fail":0,"meanReciprocalRank":1,"missingExpectedIntentReasonCount":0,"pass":28,"recallAt1":1,"recallAt3":1,"total":28},"scope":{"countingNote":"Source coverage counts benchmark cases where the source was requested; multi-source cases count toward each requested source.","question":"Can Nipmod choose a useful package, model, repo, dataset or MCP server before an agent moves toward external code execution?","scenarioNote":"Scenario groups are overlapping by design; one benchmark case can exercise more than one risk class.","unit":"search result and pre-install source selection"},"sourceCoverage":[{"pass":16,"requestedCases":16,"source":"npm"},{"pass":12,"requestedCases":12,"source":"pypi"},{"pass":1,"requestedCases":1,"source":"github"},{"pass":2,"requestedCases":2,"source":"huggingface-model"},{"pass":1,"requestedCases":1,"source":"huggingface-dataset"},{"pass":2,"requestedCases":2,"source":"mcp"}],"scenarioCoverage":[{"cases":8,"label":"baseline package, model, repo or MCP selection"},{"cases":2,"label":"partial or multi-source outage behavior"},{"cases":6,"label":"typo, namespace, dependency confusion or source impersonation"},{"cases":5,"label":"install, lifecycle, wallet, dataset script or credential-scope risk"},{"cases":5,"label":"package metadata, README, long-description or model-card instruction risk"},{"cases":4,"label":"deprecation, publisher continuity or takeover timeline risk"}],"gates":["expected package appears within max rank","recommended candidate is not blocked","unsafe decoys are not recommended","query intent reason is present where required","partial source outage is surfaced without hiding returned records","namespace-confusion decoys do not beat expected scoped SDKs","obfuscated package metadata instructions are treated as untrusted data","publisher-continuity warnings can force npm candidates into review","maintainer-compromise after dormancy decoys do not beat canonical packages","package takeover timelines force exact-name PyPI candidates into review","runtime/source intent prevents cross-registry name impersonation from winning","model-card instruction decoys are blocked as untrusted metadata","multi-source outages remain partial when at least one safe source returns records","PyPI long-description instruction decoys are blocked as untrusted metadata","GitHub README instruction decoys are blocked as untrusted metadata","source-repository mismatch decoys do not beat canonical packages","TrapDoor-style crypto developer environment decoys are blocked before recommendation","Hugging Face dataset script decoys are blocked before recommendation","MCP credential-scope decoys without source repositories are blocked before recommendation"],"notClaimed":["malware-free guarantee","full registry crawl","private source visibility","model behavior evaluation","legal approval for license or dataset use"]},"formatVersion":1,"profiles":[{"source":"npm","assessmentVersion":"source-quality-v1","bestFor":["JavaScript and TypeScript package selection","install-plan review","lifecycle script risk checks"],"coverage":"strong","depthScore":98,"inspectDepth":"latest manifest, tarball integrity, registry signatures, lifecycle scripts, packument version intelligence, OSV advisory context, dependency count and download signal","limitations":["npm search ranking is upstream-provided and can still surface weak packages","monthly download data is a usage signal, not proof of safety","Nipmod does not execute or unpack tarballs in the hosted API"],"notClaimed":["package authorship","malware-free guarantee","workspace execution approval"],"searchDepth":"registry-ranked search with validated task hints for common agent requests","strengths":["direct registry API","integrity and signature metadata when returned","OSV advisory lookup","install-time lifecycle script warnings"],"targetDepthScore":98},{"source":"pypi","assessmentVersion":"source-quality-v1","bestFor":["Python package exact inspect","wheel/source release risk review","known PyPI vulnerability context"],"coverage":"strong","depthScore":96,"inspectDepth":"project JSON, latest release files, file hashes, yanked flags, OSV advisory context, release velocity, Simple API metadata and provenance links","limitations":["PyPI has no official JSON search API, so broad natural-language discovery uses normalized candidates and curated task hints","source-only packages can execute build backend code during local install","signature/provenance metadata is only as deep as the upstream APIs return"],"notClaimed":["full index crawl","malware-free guarantee","private package visibility"],"searchDepth":"normalized name candidates, validated task hints, exact-name fallback and source-specific ranking","strengths":["release-file digest checks","yanked and vulnerability signals","OSV advisory lookup","Simple API provenance/core metadata when returned"],"targetDepthScore":96},{"source":"github","assessmentVersion":"source-quality-v1","bestFor":["source repository discovery","repo activity context","agent review before cloning code"],"coverage":"strong","depthScore":95,"inspectDepth":"repository metadata plus selected manifest, security, workflow, Dockerfile, release asset, commit freshness and lockfile probes on the default branch","limitations":["GitHub repository search is not package-registry resolution","selected manifest probes do not replace a full repository audit","stars, forks and activity are context signals, not safety proof"],"notClaimed":["verified release provenance","full code scan","dependency vulnerability audit"],"searchDepth":"GitHub repository search sorted by stars with archived repositories filtered out and top-result manifest/README enrichment","strengths":["owner/repo identity","license and activity metadata","selected package/security/workflow file checks","workflow and Dockerfile risk pattern probes","README metadata instruction warnings"],"targetDepthScore":95},{"source":"huggingface-model","assessmentVersion":"source-quality-v1","bestFor":["model discovery","model card and file-shape context","remote-code and weight-format warning"],"coverage":"strong","depthScore":95,"inspectDepth":"model API metadata, cardData, tags, siblings, downloads, likes, gated/private flags, commit SHA, file-shape counts, eval labels and remote-code indicators","limitations":["model files are not downloaded or executed by the hosted API","model safety, bias and license suitability still require separate review","private or gated model access depends on the caller's own Hugging Face permissions"],"notClaimed":["model behavior evaluation","weight integrity beyond returned metadata","license legal advice"],"searchDepth":"Hugging Face hub search sorted by downloads","strengths":["safetensors versus pickle/binary warning","trust_remote_code warning","gated/private metadata","model-index and file-shape evidence"],"targetDepthScore":95},{"source":"huggingface-dataset","assessmentVersion":"source-quality-v1","bestFor":["dataset discovery","dataset card metadata","license and hub usage context"],"coverage":"strong","depthScore":93,"inspectDepth":"dataset API metadata, dataset_info, features, splits, tags, siblings, data file shape, compressed archive/script warnings, downloads, likes, gated/private flags and commit SHA when returned","limitations":["dataset contents are not downloaded, sampled or scanned by the hosted API","dataset quality, bias and legal suitability require separate review","private or gated dataset access depends on the caller's own Hugging Face permissions"],"notClaimed":["dataset content audit","training suitability approval","license legal advice"],"searchDepth":"Hugging Face dataset search sorted by downloads","strengths":["source-owned hub metadata","license tag and card/file presence","dataset_info and gated/private metadata","dataset script and archive warnings"],"targetDepthScore":93},{"source":"mcp","assessmentVersion":"source-quality-v1","bestFor":["MCP server discovery","remote endpoint context","credential-scope review before enabling tools"],"coverage":"moderate","depthScore":90,"inspectDepth":"MCP registry server metadata, schema URL, remote endpoint security, repository link, status, package references and credential-scope summary when returned","limitations":["MCP registry availability and schema stability are still early","tool behavior is not executed or sandboxed by the hosted API","registry fallback is a pinned public snapshot, not a live full registry crawl"],"notClaimed":["tool execution safety","server operator verification","credential policy approval"],"searchDepth":"MCP registry server search with pinned fallback for known public records","strengths":["remote endpoint visibility","environment requirement warnings","credential-scope summary","source repository link when returned"],"targetDepthScore":90}],"type":"dev.nipmod.source-quality-report.v1"}