aws-samples · mnmcho · Apr 2, 2024 · Apr 25, 2024 · Apr 25, 2024
diff --git a/01_Text_generation/01_code_generation_w_bedrock.ipynb b/01_Text_generation/01_code_generation_w_bedrock.ipynb
@@ -100,7 +100,9 @@
  "cell_type": "code",
  "execution_count": null,
  "id": "89a0ad24",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
  "outputs": [],
  "source": [
  "# create sales.csv file\n",
@@ -178,7 +180,7 @@
  "- The date with the highest revenue\n",
  "- Visualize monthly sales using a bar chart\n",
  "\n",
- "Ensure the code is syntactically correct, bug-free, optimized, not span multiple lines unnessarily, and prefer to use standard libraries. Return only python code without any surrounding text, explanation or context.\n",
+ "Ensure the code is syntactically correct, bug-free, optimized, not span multiple lines unnessarily, and prefer to use standard libraries that comes with Python. Return only python code without any surrounding text, explanation or context.\n",
  "\n",
  "Assistant:\n",
  "\"\"\""
@@ -189,7 +191,7 @@
  "id": "cc9784e5-5e9d-472d-8ef1-34108ee4968b",
  "metadata": {},
  "source": [
- "Let's start by using the Anthropic Claude V2 model."
+ "Let's start by using the **Anthropic Claude V2** model."
  ]
  },
  {
@@ -224,7 +226,9 @@
  "cell_type": "code",
  "execution_count": null,
  "id": "016a118a",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
  "outputs": [],
  "source": [
  "modelId = 'anthropic.claude-v2' # change this to use a different version from the model provider\n",
@@ -234,7 +238,9 @@
  "response = boto3_bedrock.invoke_model(body=body, modelId=modelId, accept=accept, contentType=contentType)\n",
  "response_body = json.loads(response.get('body').read())\n",
  "\n",
- "response_body.get('completion')"
+ "response_text = response_body.get('completion')\n",
+ "\n",
+ "print(response_text)"
  ]
  },
  {
@@ -249,52 +255,177 @@
  "cell_type": "code",
  "execution_count": null,
  "id": "77d9b428",
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
  "outputs": [],
  "source": [
- "# Sample Generated Python Code ( Generated with Amazon Bedrock in previous step)\n",
+ "# Sample Generated Python Code (Generated with Amazon Bedrock in previous step)\n",
  "\n",
  "import csv\n",
  "from collections import defaultdict\n",
  "import matplotlib.pyplot as plt\n",
  "\n",
  "revenue = 0\n",
- "monthly_revenue = defaultdict(int)\n",
- "product_revenue = defaultdict(int)\n",
  "max_revenue = 0\n",
- "max_revenue_date = ''\n",
- "max_revenue_product = ''\n",
+ "max_revenue_product = None\n",
+ "max_revenue_date = None\n",
+ "monthly_sales = defaultdict(int)\n",
  "\n",
  "with open('sales.csv') as f:\n",
  " reader = csv.reader(f)\n",
- " next(reader)\n",
+ " next(reader) # skip header\n",
  " for row in reader:\n",
- " date = row[0]\n",
- " product = row[1]\n",
- " price = float(row[2])\n",
- " units = int(row[3])\n",
- "\n",
- " revenue += price * units\n",
- " product_revenue[product] += price * units\n",
- " monthly_revenue[date[:7]] += price * units\n",
- "\n",
- " if revenue > max_revenue:\n",
- " max_revenue = revenue\n",
- " max_revenue_date = date\n",
+ " date, product, price, units = row\n",
+ " revenue += float(price) * int(units)\n",
+ " product_revenue = float(price) * int(units)\n",
+ " if product_revenue > max_revenue:\n",
+ " max_revenue = product_revenue\n",
  " max_revenue_product = product\n",
+ " max_revenue_date = date\n",
+ " month = date.split('-')[1]\n",
+ " monthly_sales[month] += product_revenue\n",
+ " \n",
+ "print(f'Total revenue: {revenue}') \n",
+ "print(f'Product with highest revenue: {max_revenue_product}')\n",
+ "print(f'Date with highest revenue: {max_revenue_date}')\n",
  "\n",
- "months = list(monthly_revenue.keys())\n",
- "values = list(monthly_revenue.values())\n",
- "\n",
- "plt.bar(months, values)\n",
+ "plt.bar(monthly_sales.keys(), monthly_sales.values())\n",
  "plt.xlabel('Month')\n",
  "plt.ylabel('Revenue')\n",
  "plt.title('Monthly Revenue')\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a9464363",
+ "metadata": {},
+ "source": [
+ "Now, let's try **Anthropic Claude V3** model. We will use Claude 3 Sonnet here.\n",
+ "\n",
+ "For Claude v3, [Messages API](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html) must be used instead of [Text Completions API](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-text-completion.html) that are used in the above Claude v2 example. Claude models are trained to operate on alternating user and assistant conversationl turns, so each input message must be an object with a role and content. This message object can be a single user-role message, or it can be multiple user and assistant messages.\n",
+ "\n",
+ "Messages API is designed to be backward compatible, so it works with Claude 3 models (Haiku, Sonnet, Opus) as well as Claude 2 models (v2, v2.1) and Claude Instant v1.2.\n",
+ "\n",
+ "This is why we define the `messages` variable and include `messages` parameter in the body for the following cell."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4d9f2582-3275-4609-86f7-4c660e9c0303",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "messages = [{\"role\": \"user\", \"content\": prompt_data}]\n",
+ "\n",
+ "body = json.dumps({\n",
+ " \"anthropic_version\": \"\",\n",
+ " \"max_tokens\": 4096,\n",
+ " \"messages\": messages,\n",
+ " \"temperature\": 0.5,\n",
+ " \"top_k\": 250,\n",
+ " \"top_p\": 0.5,\n",
+ " \"stop_sequences\": [\"\\n\\nHuman:\"]\n",
+ " })"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "505708ec",
+ "metadata": {},
+ "source": [
+ "#### Invoke the Anthropic Claude v3 model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4337ec93",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "modelId = 'anthropic.claude-3-sonnet-20240229-v1:0' # change this to use a different version from the model provider\n",
+ "\n",
+ "response = boto3_bedrock.invoke_model(body=body, modelId=modelId)\n",
+ "response_body = json.loads(response.get('body').read())\n",
+ "\n",
+ "response_text = response_body.get('content')[0]['text']\n",
+ "\n",
+ "print(response_text)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0885f663",
+ "metadata": {},
+ "source": [
+ "#### (Optional) Execute the Bedrock generated code for validation. Go to text editor to copy the generated code as printed output can be trucncated. Replace the code in below cell."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "465755c3",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Sample Generated Python Code (Generated with Amazon Bedrock in previous step)\n",
+ "\n",
+ "import csv\n",
+ "from collections import defaultdict\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "# Read data from CSV file\n",
+ "data = []\n",
+ "with open('sales.csv', 'r') as file:\n",
+ " reader = csv.DictReader(file)\n",
+ " for row in reader:\n",
+ " data.append(row)\n",
+ "\n",
+ "# Calculate total revenue\n",
+ "total_revenue = sum(float(row['price']) * float(row['units_sold']) for row in data)\n",
+ "\n",
+ "# Find product with highest revenue\n",
+ "product_revenue = defaultdict(float)\n",
+ "for row in data:\n",
+ " product_revenue[row['product_id']] += float(row['price']) * float(row['units_sold'])\n",
+ "highest_revenue_product = max(product_revenue, key=product_revenue.get)\n",
+ "\n",
+ "# Find date with highest revenue\n",
+ "date_revenue = defaultdict(float)\n",
+ "for row in data:\n",
+ " date_revenue[row['date']] += float(row['price']) * float(row['units_sold'])\n",
+ "highest_revenue_date = max(date_revenue, key=date_revenue.get)\n",
+ "\n",
+ "# Visualize monthly sales\n",
+ "monthly_sales = defaultdict(float)\n",
+ "for row in data:\n",
+ " year, month, _ = row['date'].split('-')\n",
+ " monthly_sales[f\"{year}-{month}\"] += float(row['units_sold'])\n",
+ "\n",
+ "months = sorted(monthly_sales.keys())\n",
+ "units_sold = [monthly_sales[month] for month in months]\n",
+ "\n",
+ "plt.figure(figsize=(10, 6))\n",
+ "plt.bar(months, units_sold)\n",
+ "plt.xlabel('Month')\n",
+ "plt.ylabel('Units Sold')\n",
+ "plt.title('Monthly Sales')\n",
+ "plt.xticks(rotation=90)\n",
+ "plt.tight_layout()\n",
  "plt.show()\n",
  "\n",
- "print('Total Revenue:', revenue)\n",
- "print('Product with max revenue:', max_revenue_product)\n",
- "print('Date with max revenue:', max_revenue_date)"
+ "print(f\"Total revenue: ${total_revenue:.2f}\")\n",
+ "print(f\"Product with highest revenue: {highest_revenue_product}\")\n",
+ "print(f\"Date with highest revenue: {highest_revenue_date}\")"
  ]
  },
  {
@@ -428,7 +559,7 @@
  {
  "cell_type": "code",
  "execution_count": null,
- "id": "fe7ae02d-4b4e-470a-bfbe-9cbdd30b8db6",
+ "id": "3289c9d3-ea79-4739-aa6b-4ad2390951b1",
  "metadata": {},
  "outputs": [],
  "source": []
@@ -1042,9 +1173,9 @@
  ],
  "instance_type": "ml.t3.medium",
  "kernelspec": {
- "display_name": "Python 3 (Data Science 3.0)",
+ "display_name": "conda_python3",
  "language": "python",
- "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/sagemaker-data-science-310-v1"
+ "name": "conda_python3"
  },
  "language_info": {
  "codemirror_mode": {
@@ -1056,7 +1187,7 @@
  "name": "python",
  "nbconvert_exporter": "python",
  "pygments_lexer": "ipython3",
- "version": "3.10.6"
+ "version": "3.10.13"
  },
  "vscode": {
  "interpreter": {