Naposledy aktivní 1730886198

Revize ffbe131b019dad97fc200abff9aae3bbf038a4f4

hn_categorizer.py Raw
1"""
2LangChain Hacker News Story Categorizer Tutorial
3
4This script demonstrates key LangChain concepts by building a simple application
5that fetches Hacker News stories and categorizes them using OpenAI's GPT model.
6
7Key LangChain Concepts Demonstrated:
81. Chains: Sequences of operations that can be combined
92. Prompts: Structured way to interact with LLMs
103. LLMs: Language Model integration
114. Pydantic Output Parsing: Type-safe structured output handling
12"""
13
14import os
15import requests
16from typing import List, Dict
17from rich.console import Console
18from rich.panel import Panel
19from rich.table import Table
20from rich import print as rprint
21from pydantic import BaseModel, Field
22
23from langchain.chains import LLMChain
24from langchain.chat_models import ChatOpenAI
25from langchain.prompts import ChatPromptTemplate
26from langchain.output_parsers import PydanticOutputParser
27
28# Initialize Rich console for pretty output
29console = Console()
30
31# Define our Pydantic model for structured output
32class StoryAnalysis(BaseModel):
33 """
34 Pydantic model for story analysis output.
35 Using Pydantic provides type safety and validation.
36 """
37 category: str = Field(description="The main category of the story (Tech, Business, Science, etc.)")
38 subcategory: str = Field(description="A more specific subcategory")
39 summary: str = Field(description="A brief 1-2 sentence summary of the story's main points")
40
41def fetch_hn_stories(limit: int = 5) -> List[Dict]:
42 """
43 Fetch top stories from Hacker News.
44
45 This function demonstrates basic API interaction outside of LangChain.
46 Later, we'll combine this with LangChain components.
47 """
48 # Get top story IDs
49 response = requests.get("https://hacker-news.firebaseio.com/v0/topstories.json")
50 story_ids = response.json()[:limit]
51
52 stories = []
53 for story_id in story_ids:
54 # Fetch individual story details
55 story_url = f"https://hacker-news.firebaseio.com/v0/item/{story_id}.json"
56 story_response = requests.get(story_url)
57 story_data = story_response.json()
58
59 if story_data and 'title' in story_data:
60 stories.append({
61 'title': story_data['title'],
62 'url': story_data.get('url', ''),
63 'score': story_data.get('score', 0)
64 })
65
66 return stories
67
68def setup_langchain_categorizer():
69 """
70 Set up the LangChain components for story categorization.
71
72 This demonstrates several key LangChain concepts:
73 1. Pydantic Output Parser: Type-safe structured output
74 2. Prompt Templates: Create reusable prompts
75 3. LLM Chain: Combine prompts and models
76 """
77
78 # Create a Pydantic output parser
79 # This is a more modern approach than using ResponseSchema
80 output_parser = PydanticOutputParser(pydantic_object=StoryAnalysis)
81
82 # Create a prompt template with format instructions
83 # This shows how to create structured prompts in LangChain
84 prompt = ChatPromptTemplate.from_template("""
85 Analyze the following Hacker News story and provide a categorization and summary.
86
87 Story Title: {title}
88 URL: {url}
89
90 {format_instructions}
91
92 Provide your analysis in the exact format specified above:
93 """)
94
95 # Initialize the language model
96 # ChatOpenAI is a LangChain wrapper around OpenAI's chat models
97 llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
98
99 # Create a chain that combines the prompt and model
100 # LLMChain is a basic building block in LangChain for combining prompts with LLMs
101 chain = LLMChain(llm=llm, prompt=prompt)
102
103 return chain, output_parser
104
105def display_results(stories: List[Dict], categorized_results: List[StoryAnalysis]):
106 """
107 Display the results in a pretty format using Rich.
108
109 This function shows how to work with the structured output from our LangChain pipeline.
110 The categorized_results are strongly typed thanks to our Pydantic model.
111 """
112 table = Table(title="Hacker News Stories Analysis", show_header=True, header_style="bold magenta")
113 table.add_column("Title", style="cyan", no_wrap=False)
114 table.add_column("Category", style="green", no_wrap=True)
115 table.add_column("Subcategory", style="yellow", no_wrap=True)
116 table.add_column("Summary", style="white", no_wrap=False)
117
118 for story, result in zip(stories, categorized_results):
119 table.add_row(
120 story['title'],
121 result.category, # Note: Using dot notation because result is a Pydantic model
122 result.subcategory,
123 result.summary
124 )
125
126 console.print()
127 console.print(Panel.fit(
128 "🚀 LangChain Hacker News Analyzer",
129 subtitle="Analyzing top stories using LangChain and GPT-3.5",
130 style="bold blue"
131 ))
132 console.print()
133 console.print(table)
134 console.print()
135
136def main():
137 """
138 Main function to run the HN story categorizer.
139
140 This function orchestrates the entire pipeline:
141 1. Fetch stories from HN API
142 2. Set up LangChain components
143 3. Process stories through the LLM chain
144 4. Display results
145 """
146 # Show a welcome message
147 console.print(Panel.fit(
148 "Fetching and analyzing Hacker News stories...",
149 style="bold green"
150 ))
151
152 # Fetch stories
153 stories = fetch_hn_stories(limit=5)
154
155 # Setup LangChain components
156 chain, output_parser = setup_langchain_categorizer()
157
158 # Process each story
159 categorized_results = []
160 with console.status("[bold green]Processing stories...") as status:
161 for story in stories:
162 # Get format instructions from the parser
163 format_instructions = output_parser.get_format_instructions()
164
165 # Run the chain
166 result = chain.run(
167 title=story['title'],
168 url=story['url'],
169 format_instructions=format_instructions
170 )
171
172 # Parse the result into our Pydantic model
173 parsed_result = output_parser.parse(result)
174 categorized_results.append(parsed_result)
175
176 # Display results
177 display_results(stories, categorized_results)
178
179if __name__ == "__main__":
180 main()
requirements.txt Raw
1langchain==0.0.340
2openai==1.3.7
3python-dotenv==1.0.0
4requests==2.31.0
5rich==13.7.0
6beautifulsoup4==4.12.2