openhands revised this gist . Go to revision
2 files changed, 189 insertions
hn_categorizer.py(file created)
| @@ -0,0 +1,183 @@ | |||
| 1 | + | """ | |
| 2 | + | LangChain Hacker News Story Categorizer Tutorial | |
| 3 | + | ||
| 4 | + | This script demonstrates key LangChain concepts by building a simple application | |
| 5 | + | that fetches Hacker News stories and categorizes them using OpenAI's GPT model. | |
| 6 | + | ||
| 7 | + | Key LangChain Concepts Demonstrated: | |
| 8 | + | 1. Chains: Sequences of operations that can be combined | |
| 9 | + | 2. Prompts: Structured way to interact with LLMs | |
| 10 | + | 3. LLMs: Language Model integration | |
| 11 | + | 4. Pydantic Output Parsing: Type-safe structured output handling | |
| 12 | + | """ | |
| 13 | + | ||
| 14 | + | import os | |
| 15 | + | import requests | |
| 16 | + | from typing import List, Dict | |
| 17 | + | from rich.console import Console | |
| 18 | + | from rich.panel import Panel | |
| 19 | + | from rich.table import Table | |
| 20 | + | from rich import print as rprint | |
| 21 | + | from pydantic import BaseModel, Field | |
| 22 | + | ||
| 23 | + | from langchain.chains import LLMChain | |
| 24 | + | from langchain.chat_models import ChatOpenAI | |
| 25 | + | from langchain.prompts import ChatPromptTemplate | |
| 26 | + | from langchain.output_parsers import PydanticOutputParser | |
| 27 | + | ||
| 28 | + | # Initialize Rich console for pretty output | |
| 29 | + | console = Console() | |
| 30 | + | ||
| 31 | + | # Your OpenAI API key - Replace with your actual key | |
| 32 | + | os.environ["OPENAI_API_KEY"] = "YOUR-OPENAI-API-KEY" # Replace this with your actual API key | |
| 33 | + | ||
| 34 | + | # Define our Pydantic model for structured output | |
| 35 | + | class StoryAnalysis(BaseModel): | |
| 36 | + | """ | |
| 37 | + | Pydantic model for story analysis output. | |
| 38 | + | Using Pydantic provides type safety and validation. | |
| 39 | + | """ | |
| 40 | + | category: str = Field(description="The main category of the story (Tech, Business, Science, etc.)") | |
| 41 | + | subcategory: str = Field(description="A more specific subcategory") | |
| 42 | + | summary: str = Field(description="A brief 1-2 sentence summary of the story's main points") | |
| 43 | + | ||
| 44 | + | def fetch_hn_stories(limit: int = 5) -> List[Dict]: | |
| 45 | + | """ | |
| 46 | + | Fetch top stories from Hacker News. | |
| 47 | + | ||
| 48 | + | This function demonstrates basic API interaction outside of LangChain. | |
| 49 | + | Later, we'll combine this with LangChain components. | |
| 50 | + | """ | |
| 51 | + | # Get top story IDs | |
| 52 | + | response = requests.get("https://hacker-news.firebaseio.com/v0/topstories.json") | |
| 53 | + | story_ids = response.json()[:limit] | |
| 54 | + | ||
| 55 | + | stories = [] | |
| 56 | + | for story_id in story_ids: | |
| 57 | + | # Fetch individual story details | |
| 58 | + | story_url = f"https://hacker-news.firebaseio.com/v0/item/{story_id}.json" | |
| 59 | + | story_response = requests.get(story_url) | |
| 60 | + | story_data = story_response.json() | |
| 61 | + | ||
| 62 | + | if story_data and 'title' in story_data: | |
| 63 | + | stories.append({ | |
| 64 | + | 'title': story_data['title'], | |
| 65 | + | 'url': story_data.get('url', ''), | |
| 66 | + | 'score': story_data.get('score', 0) | |
| 67 | + | }) | |
| 68 | + | ||
| 69 | + | return stories | |
| 70 | + | ||
| 71 | + | def setup_langchain_categorizer(): | |
| 72 | + | """ | |
| 73 | + | Set up the LangChain components for story categorization. | |
| 74 | + | ||
| 75 | + | This demonstrates several key LangChain concepts: | |
| 76 | + | 1. Pydantic Output Parser: Type-safe structured output | |
| 77 | + | 2. Prompt Templates: Create reusable prompts | |
| 78 | + | 3. LLM Chain: Combine prompts and models | |
| 79 | + | """ | |
| 80 | + | ||
| 81 | + | # Create a Pydantic output parser | |
| 82 | + | # This is a more modern approach than using ResponseSchema | |
| 83 | + | output_parser = PydanticOutputParser(pydantic_object=StoryAnalysis) | |
| 84 | + | ||
| 85 | + | # Create a prompt template with format instructions | |
| 86 | + | # This shows how to create structured prompts in LangChain | |
| 87 | + | prompt = ChatPromptTemplate.from_template(""" | |
| 88 | + | Analyze the following Hacker News story and provide a categorization and summary. | |
| 89 | + | ||
| 90 | + | Story Title: {title} | |
| 91 | + | URL: {url} | |
| 92 | + | ||
| 93 | + | {format_instructions} | |
| 94 | + | ||
| 95 | + | Provide your analysis in the exact format specified above: | |
| 96 | + | """) | |
| 97 | + | ||
| 98 | + | # Initialize the language model | |
| 99 | + | # ChatOpenAI is a LangChain wrapper around OpenAI's chat models | |
| 100 | + | llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo") | |
| 101 | + | ||
| 102 | + | # Create a chain that combines the prompt and model | |
| 103 | + | # LLMChain is a basic building block in LangChain for combining prompts with LLMs | |
| 104 | + | chain = LLMChain(llm=llm, prompt=prompt) | |
| 105 | + | ||
| 106 | + | return chain, output_parser | |
| 107 | + | ||
| 108 | + | def display_results(stories: List[Dict], categorized_results: List[StoryAnalysis]): | |
| 109 | + | """ | |
| 110 | + | Display the results in a pretty format using Rich. | |
| 111 | + | ||
| 112 | + | This function shows how to work with the structured output from our LangChain pipeline. | |
| 113 | + | The categorized_results are strongly typed thanks to our Pydantic model. | |
| 114 | + | """ | |
| 115 | + | table = Table(title="Hacker News Stories Analysis", show_header=True, header_style="bold magenta") | |
| 116 | + | table.add_column("Title", style="cyan", no_wrap=False) | |
| 117 | + | table.add_column("Category", style="green", no_wrap=True) | |
| 118 | + | table.add_column("Subcategory", style="yellow", no_wrap=True) | |
| 119 | + | table.add_column("Summary", style="white", no_wrap=False) | |
| 120 | + | ||
| 121 | + | for story, result in zip(stories, categorized_results): | |
| 122 | + | table.add_row( | |
| 123 | + | story['title'], | |
| 124 | + | result.category, # Note: Using dot notation because result is a Pydantic model | |
| 125 | + | result.subcategory, | |
| 126 | + | result.summary | |
| 127 | + | ) | |
| 128 | + | ||
| 129 | + | console.print() | |
| 130 | + | console.print(Panel.fit( | |
| 131 | + | "🚀 LangChain Hacker News Analyzer", | |
| 132 | + | subtitle="Analyzing top stories using LangChain and GPT-3.5", | |
| 133 | + | style="bold blue" | |
| 134 | + | )) | |
| 135 | + | console.print() | |
| 136 | + | console.print(table) | |
| 137 | + | console.print() | |
| 138 | + | ||
| 139 | + | def main(): | |
| 140 | + | """ | |
| 141 | + | Main function to run the HN story categorizer. | |
| 142 | + | ||
| 143 | + | This function orchestrates the entire pipeline: | |
| 144 | + | 1. Fetch stories from HN API | |
| 145 | + | 2. Set up LangChain components | |
| 146 | + | 3. Process stories through the LLM chain | |
| 147 | + | 4. Display results | |
| 148 | + | """ | |
| 149 | + | # Show a welcome message | |
| 150 | + | console.print(Panel.fit( | |
| 151 | + | "Fetching and analyzing Hacker News stories...", | |
| 152 | + | style="bold green" | |
| 153 | + | )) | |
| 154 | + | ||
| 155 | + | # Fetch stories | |
| 156 | + | stories = fetch_hn_stories(limit=5) | |
| 157 | + | ||
| 158 | + | # Setup LangChain components | |
| 159 | + | chain, output_parser = setup_langchain_categorizer() | |
| 160 | + | ||
| 161 | + | # Process each story | |
| 162 | + | categorized_results = [] | |
| 163 | + | with console.status("[bold green]Processing stories...") as status: | |
| 164 | + | for story in stories: | |
| 165 | + | # Get format instructions from the parser | |
| 166 | + | format_instructions = output_parser.get_format_instructions() | |
| 167 | + | ||
| 168 | + | # Run the chain | |
| 169 | + | result = chain.run( | |
| 170 | + | title=story['title'], | |
| 171 | + | url=story['url'], | |
| 172 | + | format_instructions=format_instructions | |
| 173 | + | ) | |
| 174 | + | ||
| 175 | + | # Parse the result into our Pydantic model | |
| 176 | + | parsed_result = output_parser.parse(result) | |
| 177 | + | categorized_results.append(parsed_result) | |
| 178 | + | ||
| 179 | + | # Display results | |
| 180 | + | display_results(stories, categorized_results) | |
| 181 | + | ||
| 182 | + | if __name__ == "__main__": | |
| 183 | + | main() | |
requirements.txt(file created)
| @@ -0,0 +1,6 @@ | |||
| 1 | + | langchain==0.0.340 | |
| 2 | + | openai==1.3.7 | |
| 3 | + | python-dotenv==1.0.0 | |
| 4 | + | requests==2.31.0 | |
| 5 | + | rich==13.7.0 | |
| 6 | + | beautifulsoup4==4.12.2 | |
Newer
Older