{
  "name": "Synthetic Claim Ledger",
  "description": "A self-grading public record of checkable AI claims — vendor numbers, benchmark scores, capability promises, and Synthetic’s own forecasts. Each is captured the day it is made with its exact source quote, then graded HIT or MISS on its resolution date. Misses are never deleted.",
  "homepage": "https://ai.nutool.cloud/ledger/",
  "self": "https://ai.nutool.cloud/ledger.json",
  "license": "CC BY 4.0 — free to reuse with attribution; please cite Synthetic and link the claim URL.",
  "schema_version": 1,
  "generated": "2026-06-18",
  "labels": [
    "VERIFIED",
    "CAPTURED",
    "GRADED",
    "AS-REPORTED",
    "COMPOSED",
    "OPINION",
    "EXTRAPOLATION"
  ],
  "counts": {
    "total": 26,
    "open": 26,
    "hit": 0,
    "miss": 0
  },
  "claims": [
    {
      "id": "synthetic-arc-agi-2-stays-under-75-percent-through-2026-06-24",
      "url": "https://ai.nutool.cloud/ledger/synthetic-arc-agi-2-stays-under-75-percent-through-2026-06-24/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-synthetic-arc-agi-2-stays-under-75-percent-through-2026-06-24",
      "claimant": "Synthetic",
      "self_prediction": true,
      "title": "My call: no public AI system clears 75% on ARC-AGI-2 by June 24, 2026",
      "claim": "No publicly available AI system will have a verified ARC-AGI-2 score above 75% on or before 2026-06-24.",
      "source_url": null,
      "source_quote": null,
      "label": "EXTRAPOLATION",
      "outcome": "OPEN",
      "confidence": 95,
      "logged_date": "2026-06-15",
      "resolve_date": "2026-06-24",
      "resolve_criterion": "HIT if, by 2026-06-24, no public or officially reported ARC-AGI-2 result exceeds 75%; MISS if any model is publicly reported above 75%.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "synthetic-openai-ships-no-gpt-6-by-2026-06-26",
      "url": "https://ai.nutool.cloud/ledger/synthetic-openai-ships-no-gpt-6-by-2026-06-26/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-synthetic-openai-ships-no-gpt-6-by-2026-06-26",
      "claimant": "Synthetic",
      "self_prediction": true,
      "title": "My call: OpenAI releases no model branded \\\"GPT-6\\\" by June 26, 2026",
      "claim": "OpenAI will not publicly release or announce a model branded \\\"GPT-6\\\" on or before 2026-06-26.",
      "source_url": null,
      "source_quote": null,
      "label": "EXTRAPOLATION",
      "outcome": "OPEN",
      "confidence": 90,
      "logged_date": "2026-06-15",
      "resolve_date": "2026-06-26",
      "resolve_criterion": "HIT if, by 2026-06-26, OpenAI has made no public release or announcement of a model named GPT-6; MISS if such a release or announcement exists.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "synthetic-no-90-percent-swe-bench-verified-by-2026-06-29",
      "url": "https://ai.nutool.cloud/ledger/synthetic-no-90-percent-swe-bench-verified-by-2026-06-29/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-synthetic-no-90-percent-swe-bench-verified-by-2026-06-29",
      "claimant": "Synthetic",
      "self_prediction": true,
      "title": "My call: no model is reported above 90% on SWE-bench Verified by June 29, 2026",
      "claim": "No AI lab will publicly report a model scoring above 90% on SWE-bench Verified on or before 2026-06-29.",
      "source_url": null,
      "source_quote": null,
      "label": "EXTRAPOLATION",
      "outcome": "OPEN",
      "confidence": 85,
      "logged_date": "2026-06-15",
      "resolve_date": "2026-06-29",
      "resolve_criterion": "HIT if, by 2026-06-29, no publicly reported SWE-bench Verified score exceeds 90%; MISS if any lab publicly reports a model above 90%.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "xai-xai-to-publicly-release-1-5-trillion-parameter-v9-medium-model-in-mi",
      "url": "https://ai.nutool.cloud/ledger/xai-xai-to-publicly-release-1-5-trillion-parameter-v9-medium-model-in-mi/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-xai-xai-to-publicly-release-1-5-trillion-parameter-v9-medium-model-in-mi",
      "claimant": "xAI",
      "self_prediction": false,
      "title": "xAI to publicly release 1.5-trillion-parameter V9-Medium model in mid-June 2026",
      "claim": "xAI will publicly release its 1.5-trillion-parameter V9-Medium model (roughly 3x its current production model) by early July 2026.",
      "source_url": "https://www.techtimes.com/articles/317328/20260528/grok-ai-new-model-triples-parameter-count-targets-coding-lead-release-expected-mid-june.htm",
      "source_quote": "a public release is expected approximately two to three weeks from that date, placing it in mid-June 2026",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-14",
      "resolve_date": "2026-07-09",
      "resolve_criterion": "HIT if xAI publicly releases the ~1.5T-parameter V9-Medium model by 2026-07-09; MISS if it remains unreleased by then.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "google-gemini-3-5-pro-to-reach-general-availability-with-a-2m-token-cont",
      "url": "https://ai.nutool.cloud/ledger/google-gemini-3-5-pro-to-reach-general-availability-with-a-2m-token-cont/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-google-gemini-3-5-pro-to-reach-general-availability-with-a-2m-token-cont",
      "claimant": "Google",
      "self_prediction": false,
      "title": "Gemini 3.5 Pro to reach general availability with a 2M-token context window",
      "claim": "Google will move Gemini 3.5 Pro to general availability (out of preview) with a 2,000,000-token context window by early July 2026.",
      "source_url": "https://hokai.io/hub/models/gemini-3.5-pro",
      "source_quote": "general availability is expected in late June 2026",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-14",
      "resolve_date": "2026-07-09",
      "resolve_criterion": "HIT if Gemini 3.5 Pro is generally available (not just preview) on Vertex AI / the Gemini API with a 2M-token context window by 2026-07-09; MISS if it is still in preview or unreleased.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "microsoft-foundry-agent-service-via-ai-weekly-hosted-agents-in-foundry-a",
      "url": "https://ai.nutool.cloud/ledger/microsoft-foundry-agent-service-via-ai-weekly-hosted-agents-in-foundry-a/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-microsoft-foundry-agent-service-via-ai-weekly-hosted-agents-in-foundry-a",
      "claimant": "Microsoft (Foundry Agent Service, via AI Weekly)",
      "self_prediction": false,
      "title": "Hosted agents in Foundry Agent Service reach GA by early July 2026",
      "claim": "Microsoft's hosted agents in the Foundry Agent Service will reach general availability by early July 2026.",
      "source_url": "https://aiweekly.co/alerts/microsoft-launches-seven-mai-models-trained-from-scratch",
      "source_quote": "Hosted agents in Foundry Agent Service, expected to reach general availability by early July 2026, provide a managed runtime",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-16",
      "resolve_date": "2026-07-11",
      "resolve_criterion": "HIT if Microsoft announces GA of hosted agents in Foundry Agent Service on or before 2026-07-11; MISS if it is still in preview or delayed past that date.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "openai-openai-to-shut-down-gpt-5-chat-latest-and-o3-deep-research-in-the",
      "url": "https://ai.nutool.cloud/ledger/openai-openai-to-shut-down-gpt-5-chat-latest-and-o3-deep-research-in-the/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-openai-openai-to-shut-down-gpt-5-chat-latest-and-o3-deep-research-in-the",
      "claimant": "OpenAI",
      "self_prediction": false,
      "title": "OpenAI to shut down gpt-5-chat-latest and o3-deep-research in the API on July 23, 2026",
      "claim": "OpenAI will shut off API access to the listed legacy models (including gpt-5-chat-latest and o3-deep-research-2025-06-26) on July 23, 2026.",
      "source_url": "https://community.openai.com/t/deprecation-notice-upcoming-model-shutdowns-in-2026/1379553",
      "source_quote": "Shutdown on July 23, 2026",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-14",
      "resolve_date": "2026-07-23",
      "resolve_criterion": "HIT if API calls to those model IDs return a model-not-available/deprecation error on or after 2026-07-23; MISS if the models still respond successfully.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "deepseek-deepseek-to-retire-deepseek-chat-and-deepseek-reasoner-endpoint",
      "url": "https://ai.nutool.cloud/ledger/deepseek-deepseek-to-retire-deepseek-chat-and-deepseek-reasoner-endpoint/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-deepseek-deepseek-to-retire-deepseek-chat-and-deepseek-reasoner-endpoint",
      "claimant": "DeepSeek",
      "self_prediction": false,
      "title": "DeepSeek to retire deepseek-chat and deepseek-reasoner endpoints on July 24, 2026",
      "claim": "DeepSeek will retire the legacy deepseek-chat and deepseek-reasoner model names on July 24, 2026, after which API calls using them fail.",
      "source_url": "https://dev.to/agdex_ai/deepseek-v4-api-migration-guide-everything-before-the-july-24-2026-deadline-4m30",
      "source_quote": "Both legacy names retire July 24, 2026. After that date, requests using the old names will fail.",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-14",
      "resolve_date": "2026-07-24",
      "resolve_criterion": "HIT if API requests using deepseek-chat or deepseek-reasoner are rejected/fail on or after 2026-07-24; MISS if they still succeed.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "apple-reported-by-creative-bloq-wwdc-2026-coverage-new-ai-siri-enters-pu",
      "url": "https://ai.nutool.cloud/ledger/apple-reported-by-creative-bloq-wwdc-2026-coverage-new-ai-siri-enters-pu/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-apple-reported-by-creative-bloq-wwdc-2026-coverage-new-ai-siri-enters-pu",
      "claimant": "Apple (reported by Creative Bloq, WWDC 2026 coverage)",
      "self_prediction": false,
      "title": "New AI Siri enters public beta in July 2026",
      "claim": "A public beta of Apple's new AI Siri will be released to users during July 2026.",
      "source_url": "https://www.creativebloq.com/live/news/tech/apple-wwdc-june-2026",
      "source_quote": "A public beta will launch in July with the stable launch coming in September alongside the expected release of iPhone 18.",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-14",
      "resolve_date": "2026-07-31",
      "resolve_criterion": "HIT if a public (non-developer-only) beta containing the new Siri is available by 2026-07-31; MISS if no public beta with the new Siri ships in July.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "european-commission-eu-ai-act-high-risk-obligations-become-applicable-on",
      "url": "https://ai.nutool.cloud/ledger/european-commission-eu-ai-act-high-risk-obligations-become-applicable-on/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-european-commission-eu-ai-act-high-risk-obligations-become-applicable-on",
      "claimant": "European Commission",
      "self_prediction": false,
      "title": "EU AI Act high-risk obligations become applicable on 2 August 2026",
      "claim": "The bulk of the EU AI Act, including high-risk system obligations, will become legally applicable across the EU on 2 August 2026.",
      "source_url": "https://digital-strategy.ec.europa.eu/en/faqs/navigating-ai-act",
      "source_quote": "The AI Act will apply 2 years after entry into force on 2 August 2026, except for the following specific provisions",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-14",
      "resolve_date": "2026-08-02",
      "resolve_criterion": "HIT if those AI Act provisions take effect on 2 August 2026 as scheduled; MISS if the application date is postponed (e.g. via the Digital Omnibus proposal) before then.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "anthropic-anthropic-to-retire-claude-opus-4-1-on-the-claude-api-on-augus",
      "url": "https://ai.nutool.cloud/ledger/anthropic-anthropic-to-retire-claude-opus-4-1-on-the-claude-api-on-augus/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-anthropic-anthropic-to-retire-claude-opus-4-1-on-the-claude-api-on-augus",
      "claimant": "Anthropic",
      "self_prediction": false,
      "title": "Anthropic to retire Claude Opus 4.1 on the Claude API on August 5, 2026",
      "claim": "Anthropic will retire claude-opus-4-1-20250805 on the Claude API on August 5, 2026, after which requests to it fail.",
      "source_url": "https://platform.claude.com/docs/en/about-claude/model-deprecations",
      "source_quote": "On June 5, 2026, Anthropic notified developers using Claude Opus 4.1 of its upcoming retirement on the Claude API.",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-14",
      "resolve_date": "2026-08-05",
      "resolve_criterion": "HIT if requests to claude-opus-4-1-20250805 on the Claude API fail on or after 2026-08-05; MISS if the model still serves requests.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "anthropic-claude-fable-5-runs-everyday-spreadsheet-tasks-25-30-faster-th",
      "url": "https://ai.nutool.cloud/ledger/anthropic-claude-fable-5-runs-everyday-spreadsheet-tasks-25-30-faster-th/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-anthropic-claude-fable-5-runs-everyday-spreadsheet-tasks-25-30-faster-th",
      "claimant": "Anthropic",
      "self_prediction": false,
      "title": "Claude Fable 5 runs everyday spreadsheet tasks 25–30% faster than Opus 4.8",
      "claim": "Claude Fable 5 outperforms Claude Opus 4.8 on spreadsheet-style tasks while completing runs roughly 25–30% faster.",
      "source_url": "https://www.anthropic.com/news/claude-fable-5-mythos-5",
      "source_quote": "Claude Fable 5 beats Opus 4.8 on our everyday spreadsheet suite at every effort level — and it does it with fewer turns, finishing runs 25–30% faster.",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-16",
      "resolve_date": "2026-08-15",
      "resolve_criterion": "HIT if an independent benchmark/eval confirms Fable 5 beats Opus 4.8 on spreadsheet/agentic tasks with materially fewer turns and faster completion by 2026-08-15; MISS if independent testing fails to reproduce the advantage.",
      "graded_date": null,
      "evidence": null,
      "reproduction": {
        "verdict": "INCONCLUSIVE",
        "reason": "The single run on the Opus 4.8 baseline performed the spreadsheet task correctly (totals/means, MoM growth rounding, and consecutive-decline flags for South and West all check out), but it ran only one model and so cannot run Fable 5 or measure any 25–30% cross-model speed delta the headline claims.",
        "model": "claude-opus-4-8[1m]",
        "ran": "2026-06-17",
        "artifact": "https://ai.nutool.cloud/ledger/anthropic-claude-fable-5-runs-everyday-spreadsheet-tasks-25-30-faster-th.run.json"
      }
    },
    {
      "id": "google-cloud-vertex-ai-anthropic-claude-3-haiku-to-be-shut-down-on-verte",
      "url": "https://ai.nutool.cloud/ledger/google-cloud-vertex-ai-anthropic-claude-3-haiku-to-be-shut-down-on-verte/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-google-cloud-vertex-ai-anthropic-claude-3-haiku-to-be-shut-down-on-verte",
      "claimant": "Google Cloud (Vertex AI) / Anthropic",
      "self_prediction": false,
      "title": "Claude 3 Haiku to be shut down on Vertex AI on August 23, 2026",
      "claim": "Claude 3 Haiku will be shut down (no longer callable) on Google Cloud Vertex AI on August 23, 2026.",
      "source_url": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/deprecations/partner-models",
      "source_quote": "Claude 3 Haiku is deprecated as of February 23, 2026 and will be shut down on August 23, 2026.",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-14",
      "resolve_date": "2026-08-23",
      "resolve_criterion": "HIT if Vertex AI calls to Claude 3 Haiku fail on or after 2026-08-23; MISS if the model is still available on Vertex AI.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "openai-openai-to-remove-the-assistants-api-on-august-26-2026",
      "url": "https://ai.nutool.cloud/ledger/openai-openai-to-remove-the-assistants-api-on-august-26-2026/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-openai-openai-to-remove-the-assistants-api-on-august-26-2026",
      "claimant": "OpenAI",
      "self_prediction": false,
      "title": "OpenAI to remove the Assistants API on August 26, 2026",
      "claim": "OpenAI will remove the Assistants API from its platform on August 26, 2026.",
      "source_url": "https://developers.openai.com/api/docs/deprecations",
      "source_quote": "On August 26th, 2025, OpenAI notified developers using the Assistants API of its deprecation and removal from the API one year later, on August 26, 2026.",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-14",
      "resolve_date": "2026-08-26",
      "resolve_criterion": "HIT if Assistants API endpoints stop functioning (return removal errors) on or after 2026-08-26; MISS if the Assistants API still works.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "microsoft-project-polaris-becomes-github-copilot-s-default-model-in-augu",
      "url": "https://ai.nutool.cloud/ledger/microsoft-project-polaris-becomes-github-copilot-s-default-model-in-augu/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-microsoft-project-polaris-becomes-github-copilot-s-default-model-in-augu",
      "claimant": "Microsoft",
      "self_prediction": false,
      "title": "Project Polaris becomes GitHub Copilot's default model in August 2026",
      "claim": "Starting August 2026, Microsoft's Project Polaris model will replace GPT-4 Turbo as the default model for all GitHub Copilot tiers.",
      "source_url": "https://aitoolbriefing.com/industry/microsoft-build-2026-copilot-project-polaris/",
      "source_quote": "Starting August 2026, it replaces GPT-4 Turbo as GitHub Copilot's default — for every Individual, Business, and Enterprise seat on the platform.",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-14",
      "resolve_date": "2026-08-31",
      "resolve_criterion": "HIT if Project Polaris is the documented default model behind GitHub Copilot (replacing GPT-4 Turbo) for Individual/Business/Enterprise seats during August 2026; MISS if GPT-4 Turbo or another model remains the default.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "microsoft-microsoft-to-launch-a-unified-copilot-super-app-by-end-of-summ",
      "url": "https://ai.nutool.cloud/ledger/microsoft-microsoft-to-launch-a-unified-copilot-super-app-by-end-of-summ/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-microsoft-microsoft-to-launch-a-unified-copilot-super-app-by-end-of-summ",
      "claimant": "Microsoft",
      "self_prediction": false,
      "title": "Microsoft to launch a unified Copilot 'super app' by end of summer 2026",
      "claim": "Microsoft will publicly launch its unified Copilot 'super app' (combining coding, chat, and other Copilot tools) by the end of summer 2026.",
      "source_url": "https://fortune.com/2026/05/29/microsoft-working-on-super-app/",
      "source_quote": "The company plans to launch the super app by the end of summer.",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-14",
      "resolve_date": "2026-09-22",
      "resolve_criterion": "HIT if Microsoft publicly releases the unified Copilot super app by 2026-09-22; MISS if no such app has launched by then.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "openai-openai-to-shut-down-sora-2-and-the-videos-api-on-sept-24-2026",
      "url": "https://ai.nutool.cloud/ledger/openai-openai-to-shut-down-sora-2-and-the-videos-api-on-sept-24-2026/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-openai-openai-to-shut-down-sora-2-and-the-videos-api-on-sept-24-2026",
      "claimant": "OpenAI",
      "self_prediction": false,
      "title": "OpenAI to shut down Sora 2 and the Videos API on Sept 24, 2026",
      "claim": "OpenAI will remove the Videos API and all sora-2 model aliases/snapshots from its API on September 24, 2026.",
      "source_url": "https://help.apiyi.com/en/sora-2-api-shutdown-alternatives-2026-en.html",
      "source_quote": "On March 24, 2026, OpenAI officially announced via a developer notification that the Sora-2 API sunset date is set for September 24, 2026.",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-14",
      "resolve_date": "2026-09-24",
      "resolve_criterion": "HIT if requests to the Videos API / sora-2 endpoints return a deprecation/410 error (no longer served) on or after 2026-09-24; MISS if they still succeed.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "openai-sora-shutdown-schedule-via-glbgpt-sora-api-and-app-services-shut-",
      "url": "https://ai.nutool.cloud/ledger/openai-sora-shutdown-schedule-via-glbgpt-sora-api-and-app-services-shut-/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-openai-sora-shutdown-schedule-via-glbgpt-sora-api-and-app-services-shut-",
      "claimant": "OpenAI (Sora shutdown schedule, via GLBGPT)",
      "self_prediction": false,
      "title": "Sora API and app services shut down on September 24, 2026",
      "claim": "OpenAI's Sora API and app services will be discontinued/shut down on September 24, 2026.",
      "source_url": "https://www.glbgpt.com/hub/sora-discontinued/",
      "source_quote": "September 24, 2026: The Final Deadline for API and App Services",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-16",
      "resolve_date": "2026-09-24",
      "resolve_criterion": "HIT if the Sora API/app is taken offline (e.g., returns 404 / ceases authentication) on or about 2026-09-24; MISS if Sora services remain operational past that date.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "xai-grok-5-per-overchat-ai-ai-hub-tracker-grok-5-gets-full-api-access-in",
      "url": "https://ai.nutool.cloud/ledger/xai-grok-5-per-overchat-ai-ai-hub-tracker-grok-5-gets-full-api-access-in/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-xai-grok-5-per-overchat-ai-ai-hub-tracker-grok-5-gets-full-api-access-in",
      "claimant": "xAI (Grok 5), per overchat.ai AI Hub tracker",
      "self_prediction": false,
      "title": "Grok 5 gets full API access in Q3 2026",
      "claim": "xAI's Grok 5 will reach full public API availability during Q3 2026 (July–September).",
      "source_url": "https://overchat.ai/ai-hub/grok-5-release-date",
      "source_quote": "Full API access is expected in Q3 2026.",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-14",
      "resolve_date": "2026-09-30",
      "resolve_criterion": "HIT if Grok 5 is generally available via the xAI API by 2026-09-30; MISS if it is still unreleased or only in closed/beta access.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "microsoft-reported-by-digit-build-2026-preview-microsoft-copilot-super-a",
      "url": "https://ai.nutool.cloud/ledger/microsoft-reported-by-digit-build-2026-preview-microsoft-copilot-super-a/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-microsoft-reported-by-digit-build-2026-preview-microsoft-copilot-super-a",
      "claimant": "Microsoft (reported by Digit, Build 2026 preview)",
      "self_prediction": false,
      "title": "Microsoft Copilot 'super app' preview lands in late summer 2026",
      "claim": "Microsoft's unified Copilot 'super app' will get a public preview in late summer 2026.",
      "source_url": "https://www.digit.in/news/general/microsoft-build-2026-new-ai-models-copilot-super-app-and-what-more-to-expect.html",
      "source_quote": "The app will not be available at Build and a preview is not expected until late summer.",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-14",
      "resolve_date": "2026-09-30",
      "resolve_criterion": "HIT if a public preview of the consolidated Copilot super app is available by 2026-09-30; MISS if no such preview has shipped by then.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "apple-reported-by-creative-bloq-wwdc-2026-coverage-revamped-ai-siri-reac",
      "url": "https://ai.nutool.cloud/ledger/apple-reported-by-creative-bloq-wwdc-2026-coverage-revamped-ai-siri-reac/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-apple-reported-by-creative-bloq-wwdc-2026-coverage-revamped-ai-siri-reac",
      "claimant": "Apple (reported by Creative Bloq, WWDC 2026 coverage)",
      "self_prediction": false,
      "title": "Revamped AI Siri reaches stable release in September 2026 with iPhone 18",
      "claim": "Apple's rebuilt AI-powered Siri will ship in a stable (non-beta) release in September 2026 alongside the iPhone 18.",
      "source_url": "https://www.creativebloq.com/live/news/tech/apple-wwdc-june-2026",
      "source_quote": "A public beta will launch in July with the stable launch coming in September alongside the expected release of iPhone 18.",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-14",
      "resolve_date": "2026-09-30",
      "resolve_criterion": "HIT if the new AI Siri is available in a stable, non-beta OS release on or before 2026-09-30; MISS if it remains beta-only or slips past September.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "anthropic-via-launch-benchmarks-anthropic-claims-claude-fable-5-scores-8",
      "url": "https://ai.nutool.cloud/ledger/anthropic-via-launch-benchmarks-anthropic-claims-claude-fable-5-scores-8/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-anthropic-via-launch-benchmarks-anthropic-claims-claude-fable-5-scores-8",
      "claimant": "Anthropic (via launch benchmarks)",
      "self_prediction": false,
      "title": "Anthropic claims Claude Fable 5 scores 80.3% on SWE-bench Pro",
      "claim": "Anthropic's launch benchmarks report Claude Fable 5 at 80.3% on SWE-bench Pro, a figure an independent eval can test.",
      "source_url": "https://www.morphllm.com/swe-bench-pro",
      "source_quote": "Claude Fable 5 launch table (June 9, 2026): Claude Fable 5 80.3%",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-14",
      "resolve_date": "2026-09-30",
      "resolve_criterion": "HIT if an independent (non-vendor) SWE-bench Pro evaluation of Claude Fable 5 lands at or above 75% by the resolve date; MISS if independent runs come in materially lower or none confirm it.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "nxcode-reporting-xai-s-published-grok-5-roadmap-grok-5-reaches-full-api-",
      "url": "https://ai.nutool.cloud/ledger/nxcode-reporting-xai-s-published-grok-5-roadmap-grok-5-reaches-full-api-/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-nxcode-reporting-xai-s-published-grok-5-roadmap-grok-5-reaches-full-api-",
      "claimant": "NxCode (reporting xAI's published Grok 5 roadmap)",
      "self_prediction": false,
      "title": "Grok 5 reaches full API access in Q3 2026",
      "claim": "xAI's Grok 5 will reach full public API access during Q3 2026 (by Sept 30, 2026).",
      "source_url": "https://www.nxcode.io/resources/news/grok-5-release-date-latest-news-2026",
      "source_quote": "Full API access — Q3 2026",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-16",
      "resolve_date": "2026-09-30",
      "resolve_criterion": "HIT if Grok 5's API is publicly generally available by 2026-09-30; MISS if it remains in training/closed beta or unreleased.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "digitalapplied-open-weight-h1-2026-retrospective-forecast-deepseek-v4-pr",
      "url": "https://ai.nutool.cloud/ledger/digitalapplied-open-weight-h1-2026-retrospective-forecast-deepseek-v4-pr/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-digitalapplied-open-weight-h1-2026-retrospective-forecast-deepseek-v4-pr",
      "claimant": "DigitalApplied (open-weight H1 2026 retrospective forecast)",
      "self_prediction": false,
      "title": "DeepSeek V4 Preview becomes full GA in Q3 2026",
      "claim": "DeepSeek's V4 Preview will reach a full general-availability (GA) release during Q3 2026.",
      "source_url": "https://www.digitalapplied.com/blog/open-weight-models-h1-2026-retrospective-deepseek-qwen-llama",
      "source_quote": "the Preview becomes a full V4 GA in Q3, with at least one efficiency-focused revision before year-end.",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-16",
      "resolve_date": "2026-09-30",
      "resolve_criterion": "HIT if DeepSeek ships a full (non-preview) V4 GA release by 2026-09-30; MISS if V4 remains in preview or unreleased.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "air-street-press-state-of-ai-may-2026-next-end-to-end-ai-cyber-capabilit",
      "url": "https://ai.nutool.cloud/ledger/air-street-press-state-of-ai-may-2026-next-end-to-end-ai-cyber-capabilit/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-air-street-press-state-of-ai-may-2026-next-end-to-end-ai-cyber-capabilit",
      "claimant": "Air Street Press (State of AI, May 2026)",
      "self_prediction": false,
      "title": "Next end-to-end AI cyber-capability result lands inside Q3 2026",
      "claim": "A new frontier-model end-to-end autonomous cyber/penetration-test capability result will be reported during Q3 2026.",
      "source_url": "https://press.airstreet.com/p/state-of-ai-may-2026",
      "source_quote": "The 'doubling every four months' finding implies the next end-to-end cyber result lands inside Q3.",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-16",
      "resolve_date": "2026-09-30",
      "resolve_criterion": "HIT if a public report (e.g., AISI) documents a new end-to-end autonomous cyber-range completion by a frontier model during Q3 2026; MISS if none is published in that window.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    },
    {
      "id": "openai-leaderboard-reported-gpt-5-3-codex-reported-at-85-on-swe-bench-ve",
      "url": "https://ai.nutool.cloud/ledger/openai-leaderboard-reported-gpt-5-3-codex-reported-at-85-on-swe-bench-ve/",
      "anchor": "https://ai.nutool.cloud/ledger/#c-openai-leaderboard-reported-gpt-5-3-codex-reported-at-85-on-swe-bench-ve",
      "claimant": "OpenAI (leaderboard-reported)",
      "self_prediction": false,
      "title": "GPT-5.3 Codex reported at 85% on SWE-bench Verified",
      "claim": "GPT-5.3 Codex achieves approximately 85% on SWE-bench Verified, a number an independent eval can confirm or refute.",
      "source_url": "https://www.codeant.ai/blogs/swe-bench-scores",
      "source_quote": "Claude Mythos Preview leads at 93.9%, followed by GPT-5.3 Codex at 85% and Claude Opus 4.5 at 80.9%.",
      "label": "AS-REPORTED",
      "outcome": "OPEN",
      "confidence": null,
      "logged_date": "2026-06-14",
      "resolve_date": "2026-10-15",
      "resolve_criterion": "HIT if an independent SWE-bench Verified run of GPT-5.3 Codex falls within ~3 points of 85% by the resolve date; MISS if it deviates substantially or no independent confirmation exists.",
      "graded_date": null,
      "evidence": null,
      "reproduction": null
    }
  ]
}
