diff --git a/AmazonBedrock/anthropic/08_Avoiding_Hallucinations.ipynb b/AmazonBedrock/anthropic/08_Avoiding_Hallucinations.ipynb index 8bf0e68..7fefd59 100755 --- a/AmazonBedrock/anthropic/08_Avoiding_Hallucinations.ipynb +++ b/AmazonBedrock/anthropic/08_Avoiding_Hallucinations.ipynb @@ -207,9 +207,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "How do we fix this? Well, a great way to reduce hallucinations on long documents is to **make Claude gather evidence first.** \n", + "How do we fix this? In practice, the strongest version of this pattern uses **two separate safeguards together**:\n", "\n", - "In this case, we **tell Claude to first extract relevant quotes, then base its answer on those quotes**. Telling Claude to do so here makes it correctly notice that the quote does not answer the question." + "1. **Evidence gathering**: have Claude pull the most relevant quote before answering.\n", + "2. **Information-sufficiency check**: have Claude explicitly decide whether that quote actually answers the question or whether it still lacks the needed detail.\n", + "\n", + "The prompt below intentionally combines both. That matters because they do different jobs:\n", + "- The quote extraction step improves grounding and makes the model show its work.\n", + "- The sufficiency check gives Claude permission to say that the document does **not** fully answer the question, instead of guessing from a nearby distractor.\n", + "\n", + "If you only ask for quotes, Claude can still over-interpret a near match. If you only ask whether the document is sufficient, Claude is less likely to hallucinate, but you lose the explicit evidence trail. **Using both together is the most reliable teaching example here.**\n" ] }, { @@ -376,8 +383,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Exercise 8.1 - Prospectus Hallucination\n", - "Modify the `PROMPT` to fix Claude's hallucination issue by asking for citations. The correct answer is that subscribers went up 49x." + "### Exercise 8.2 - Prospectus Hallucination\n", + "Modify the `PROMPT` to fix Claude's hallucination issue by asking for citations **and** telling it to check whether those citations actually support the final answer. The correct answer is that subscribers went up 49x.\n" ] }, { diff --git a/AmazonBedrock/boto3/08_Avoiding_Hallucinations.ipynb b/AmazonBedrock/boto3/08_Avoiding_Hallucinations.ipynb index a303f1b..62a8712 100755 --- a/AmazonBedrock/boto3/08_Avoiding_Hallucinations.ipynb +++ b/AmazonBedrock/boto3/08_Avoiding_Hallucinations.ipynb @@ -213,9 +213,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "How do we fix this? Well, a great way to reduce hallucinations on long documents is to **make Claude gather evidence first.** \n", + "How do we fix this? In practice, the strongest version of this pattern uses **two separate safeguards together**:\n", "\n", - "In this case, we **tell Claude to first extract relevant quotes, then base its answer on those quotes**. Telling Claude to do so here makes it correctly notice that the quote does not answer the question." + "1. **Evidence gathering**: have Claude pull the most relevant quote before answering.\n", + "2. **Information-sufficiency check**: have Claude explicitly decide whether that quote actually answers the question or whether it still lacks the needed detail.\n", + "\n", + "The prompt below intentionally combines both. That matters because they do different jobs:\n", + "- The quote extraction step improves grounding and makes the model show its work.\n", + "- The sufficiency check gives Claude permission to say that the document does **not** fully answer the question, instead of guessing from a nearby distractor.\n", + "\n", + "If you only ask for quotes, Claude can still over-interpret a near match. If you only ask whether the document is sufficient, Claude is less likely to hallucinate, but you lose the explicit evidence trail. **Using both together is the most reliable teaching example here.**\n" ] }, { @@ -382,8 +389,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Exercise 8.1 - Prospectus Hallucination\n", - "Modify the `PROMPT` to fix Claude's hallucination issue by asking for citations. The correct answer is that subscribers went up 49x." + "### Exercise 8.2 - Prospectus Hallucination\n", + "Modify the `PROMPT` to fix Claude's hallucination issue by asking for citations **and** telling it to check whether those citations actually support the final answer. The correct answer is that subscribers went up 49x.\n" ] }, { diff --git a/Anthropic 1P/08_Avoiding_Hallucinations.ipynb b/Anthropic 1P/08_Avoiding_Hallucinations.ipynb index 7e3df42..5b12bf6 100644 --- a/Anthropic 1P/08_Avoiding_Hallucinations.ipynb +++ b/Anthropic 1P/08_Avoiding_Hallucinations.ipynb @@ -201,9 +201,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "How do we fix this? Well, a great way to reduce hallucinations on long documents is to **make Claude gather evidence first.** \n", + "How do we fix this? In practice, the strongest version of this pattern uses **two separate safeguards together**:\n", "\n", - "In this case, we **tell Claude to first extract relevant quotes, then base its answer on those quotes**. Telling Claude to do so here makes it correctly notice that the quote does not answer the question." + "1. **Evidence gathering**: have Claude pull the most relevant quote before answering.\n", + "2. **Information-sufficiency check**: have Claude explicitly decide whether that quote actually answers the question or whether it still lacks the needed detail.\n", + "\n", + "The prompt below intentionally combines both. That matters because they do different jobs:\n", + "- The quote extraction step improves grounding and makes the model show its work.\n", + "- The sufficiency check gives Claude permission to say that the document does **not** fully answer the question, instead of guessing from a nearby distractor.\n", + "\n", + "If you only ask for quotes, Claude can still over-interpret a near match. If you only ask whether the document is sufficient, Claude is less likely to hallucinate, but you lose the explicit evidence trail. **Using both together is the most reliable teaching example here.**\n" ] }, { @@ -370,8 +377,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Exercise 8.1 - Prospectus Hallucination\n", - "Modify the `PROMPT` to fix Claude's hallucination issue by asking for citations. The correct answer is that subscribers went up 49x." + "### Exercise 8.2 - Prospectus Hallucination\n", + "Modify the `PROMPT` to fix Claude's hallucination issue by asking for citations **and** telling it to check whether those citations actually support the final answer. The correct answer is that subscribers went up 49x.\n" ] }, { diff --git a/scripts/check_hallucination_lesson_wording.py b/scripts/check_hallucination_lesson_wording.py new file mode 100644 index 0000000..b0d27be --- /dev/null +++ b/scripts/check_hallucination_lesson_wording.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +import json +from pathlib import Path +import sys + +ROOT = Path(__file__).resolve().parents[1] +NOTEBOOKS = [ + ROOT / 'Anthropic 1P/08_Avoiding_Hallucinations.ipynb', + ROOT / 'AmazonBedrock/anthropic/08_Avoiding_Hallucinations.ipynb', + ROOT / 'AmazonBedrock/boto3/08_Avoiding_Hallucinations.ipynb', +] + +required_fragments = [ + 'Evidence gathering', + 'Information-sufficiency check', + 'Using both together is the most reliable teaching example here.', +] +exercise_heading = '### Exercise 8.2 - Prospectus Hallucination' +exercise_phrase = 'asking for citations **and** telling it to check whether those citations actually support the final answer' + +failed = False +for notebook in NOTEBOOKS: + data = json.loads(notebook.read_text()) + lesson = ''.join(data['cells'][9]['source']) + exercise = ''.join(data['cells'][17]['source']) + + missing = [frag for frag in required_fragments if frag not in lesson] + if missing: + failed = True + print(f'{notebook}: missing lesson fragments: {missing}') + + if exercise_heading not in exercise or exercise_phrase not in exercise: + failed = True + print(f'{notebook}: exercise wording not updated as expected') + +if failed: + sys.exit(1) + +print(f'validated {len(NOTEBOOKS)} chapter 8 notebooks')