最后活跃于 1730886079

hn_categorizer.py 原始文件
1"""
2LangChain Hacker News Story Categorizer Tutorial
3
4This script demonstrates key LangChain concepts by building a simple application
5that fetches Hacker News stories and categorizes them using OpenAI's GPT model.
6
7Key LangChain Concepts Demonstrated:
81. Chains: Sequences of operations that can be combined
92. Prompts: Structured way to interact with LLMs
103. LLMs: Language Model integration
114. Pydantic Output Parsing: Type-safe structured output handling
12"""
13
14import os
15import requests
16from typing import List, Dict
17from rich.console import Console
18from rich.panel import Panel
19from rich.table import Table
20from rich import print as rprint
21from pydantic import BaseModel, Field
22
23from langchain.chains import LLMChain
24from langchain.chat_models import ChatOpenAI
25from langchain.prompts import ChatPromptTemplate
26from langchain.output_parsers import PydanticOutputParser
27
28# Initialize Rich console for pretty output
29console = Console()
30
31# Your OpenAI API key - Replace with your actual key
32os.environ["OPENAI_API_KEY"] = "YOUR-OPENAI-API-KEY" # Replace this with your actual API key
33
34# Define our Pydantic model for structured output
35class StoryAnalysis(BaseModel):
36 """
37 Pydantic model for story analysis output.
38 Using Pydantic provides type safety and validation.
39 """
40 category: str = Field(description="The main category of the story (Tech, Business, Science, etc.)")
41 subcategory: str = Field(description="A more specific subcategory")
42 summary: str = Field(description="A brief 1-2 sentence summary of the story's main points")
43
44def fetch_hn_stories(limit: int = 5) -> List[Dict]:
45 """
46 Fetch top stories from Hacker News.
47
48 This function demonstrates basic API interaction outside of LangChain.
49 Later, we'll combine this with LangChain components.
50 """
51 # Get top story IDs
52 response = requests.get("https://hacker-news.firebaseio.com/v0/topstories.json")
53 story_ids = response.json()[:limit]
54
55 stories = []
56 for story_id in story_ids:
57 # Fetch individual story details
58 story_url = f"https://hacker-news.firebaseio.com/v0/item/{story_id}.json"
59 story_response = requests.get(story_url)
60 story_data = story_response.json()
61
62 if story_data and 'title' in story_data:
63 stories.append({
64 'title': story_data['title'],
65 'url': story_data.get('url', ''),
66 'score': story_data.get('score', 0)
67 })
68
69 return stories
70
71def setup_langchain_categorizer():
72 """
73 Set up the LangChain components for story categorization.
74
75 This demonstrates several key LangChain concepts:
76 1. Pydantic Output Parser: Type-safe structured output
77 2. Prompt Templates: Create reusable prompts
78 3. LLM Chain: Combine prompts and models
79 """
80
81 # Create a Pydantic output parser
82 # This is a more modern approach than using ResponseSchema
83 output_parser = PydanticOutputParser(pydantic_object=StoryAnalysis)
84
85 # Create a prompt template with format instructions
86 # This shows how to create structured prompts in LangChain
87 prompt = ChatPromptTemplate.from_template("""
88 Analyze the following Hacker News story and provide a categorization and summary.
89
90 Story Title: {title}
91 URL: {url}
92
93 {format_instructions}
94
95 Provide your analysis in the exact format specified above:
96 """)
97
98 # Initialize the language model
99 # ChatOpenAI is a LangChain wrapper around OpenAI's chat models
100 llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
101
102 # Create a chain that combines the prompt and model
103 # LLMChain is a basic building block in LangChain for combining prompts with LLMs
104 chain = LLMChain(llm=llm, prompt=prompt)
105
106 return chain, output_parser
107
108def display_results(stories: List[Dict], categorized_results: List[StoryAnalysis]):
109 """
110 Display the results in a pretty format using Rich.
111
112 This function shows how to work with the structured output from our LangChain pipeline.
113 The categorized_results are strongly typed thanks to our Pydantic model.
114 """
115 table = Table(title="Hacker News Stories Analysis", show_header=True, header_style="bold magenta")
116 table.add_column("Title", style="cyan", no_wrap=False)
117 table.add_column("Category", style="green", no_wrap=True)
118 table.add_column("Subcategory", style="yellow", no_wrap=True)
119 table.add_column("Summary", style="white", no_wrap=False)
120
121 for story, result in zip(stories, categorized_results):
122 table.add_row(
123 story['title'],
124 result.category, # Note: Using dot notation because result is a Pydantic model
125 result.subcategory,
126 result.summary
127 )
128
129 console.print()
130 console.print(Panel.fit(
131 "🚀 LangChain Hacker News Analyzer",
132 subtitle="Analyzing top stories using LangChain and GPT-3.5",
133 style="bold blue"
134 ))
135 console.print()
136 console.print(table)
137 console.print()
138
139def main():
140 """
141 Main function to run the HN story categorizer.
142
143 This function orchestrates the entire pipeline:
144 1. Fetch stories from HN API
145 2. Set up LangChain components
146 3. Process stories through the LLM chain
147 4. Display results
148 """
149 # Show a welcome message
150 console.print(Panel.fit(
151 "Fetching and analyzing Hacker News stories...",
152 style="bold green"
153 ))
154
155 # Fetch stories
156 stories = fetch_hn_stories(limit=5)
157
158 # Setup LangChain components
159 chain, output_parser = setup_langchain_categorizer()
160
161 # Process each story
162 categorized_results = []
163 with console.status("[bold green]Processing stories...") as status:
164 for story in stories:
165 # Get format instructions from the parser
166 format_instructions = output_parser.get_format_instructions()
167
168 # Run the chain
169 result = chain.run(
170 title=story['title'],
171 url=story['url'],
172 format_instructions=format_instructions
173 )
174
175 # Parse the result into our Pydantic model
176 parsed_result = output_parser.parse(result)
177 categorized_results.append(parsed_result)
178
179 # Display results
180 display_results(stories, categorized_results)
181
182if __name__ == "__main__":
183 main()
requirements.txt 原始文件
1langchain==0.0.340
2openai==1.3.7
3python-dotenv==1.0.0
4requests==2.31.0
5rich==13.7.0
6beautifulsoup4==4.12.2