Revision of requirements.txt

1

+

"""

2

+

LangChain Hacker News Story Categorizer Tutorial

3

+

4

+

This script demonstrates key LangChain concepts by building a simple application

5

+

that fetches Hacker News stories and categorizes them using OpenAI's GPT model.

6

+

7

+

Key LangChain Concepts Demonstrated:

8

+

1. Chains: Sequences of operations that can be combined

9

+

2. Prompts: Structured way to interact with LLMs

10

+

3. LLMs: Language Model integration

11

+

4. Pydantic Output Parsing: Type-safe structured output handling

12

+

"""

13

+

14

+

import os

15

+

import requests

16

+

from typing import List, Dict

17

+

from rich.console import Console

18

+

from rich.panel import Panel

19

+

from rich.table import Table

20

+

from rich import print as rprint

21

+

from pydantic import BaseModel, Field

22

+

23

+

from langchain.chains import LLMChain

24

+

from langchain.chat_models import ChatOpenAI

25

+

from langchain.prompts import ChatPromptTemplate

26

+

from langchain.output_parsers import PydanticOutputParser

27

+

28

+

# Initialize Rich console for pretty output

29

+

console = Console()

30

+

31

+

# Your OpenAI API key - Replace with your actual key

32

+

os.environ["OPENAI_API_KEY"] = "YOUR-OPENAI-API-KEY" # Replace this with your actual API key

33

+

34

+

# Define our Pydantic model for structured output

35

+

class StoryAnalysis(BaseModel):

36

+

"""

37

+

Pydantic model for story analysis output.

38

+

Using Pydantic provides type safety and validation.

39

+

"""

40

+

category: str = Field(description="The main category of the story (Tech, Business, Science, etc.)")

41

+

subcategory: str = Field(description="A more specific subcategory")

42

+

summary: str = Field(description="A brief 1-2 sentence summary of the story's main points")

43

+

44

+

def fetch_hn_stories(limit: int = 5) -> List[Dict]:

45

+

"""

46

+

Fetch top stories from Hacker News.

47

+

48

+

This function demonstrates basic API interaction outside of LangChain.

49

+

Later, we'll combine this with LangChain components.

50

+

"""

51

+

# Get top story IDs

52

+

response = requests.get("https://hacker-news.firebaseio.com/v0/topstories.json")

53

+

story_ids = response.json()[:limit]

54

+

55

+

stories = []

56

+

for story_id in story_ids:

57

+

# Fetch individual story details

58

+

story_url = f"https://hacker-news.firebaseio.com/v0/item/{story_id}.json"

59

+

story_response = requests.get(story_url)

60

+

story_data = story_response.json()

61

+

62

+

if story_data and 'title' in story_data:

63

+

stories.append({

64

+

'title': story_data['title'],

65

+

'url': story_data.get('url', ''),

66

+

'score': story_data.get('score', 0)

67

+

})

68

+

69

+

return stories

70

+

71

+

def setup_langchain_categorizer():

72

+

"""

73

+

Set up the LangChain components for story categorization.

74

+

75

+

This demonstrates several key LangChain concepts:

76

+

1. Pydantic Output Parser: Type-safe structured output

77

+

2. Prompt Templates: Create reusable prompts

78

+

3. LLM Chain: Combine prompts and models

79

+

"""

80

+

81

+

# Create a Pydantic output parser

82

+

# This is a more modern approach than using ResponseSchema

83

+

output_parser = PydanticOutputParser(pydantic_object=StoryAnalysis)

84

+

85

+

# Create a prompt template with format instructions

86

+

# This shows how to create structured prompts in LangChain

87

+

prompt = ChatPromptTemplate.from_template("""

88

+

Analyze the following Hacker News story and provide a categorization and summary.

89

+

90

+

Story Title: {title}

91

+

URL: {url}

92

+

93

+

{format_instructions}

94

+

95

+

Provide your analysis in the exact format specified above:

96

+

""")

97

+

98

+

# Initialize the language model

99

+

# ChatOpenAI is a LangChain wrapper around OpenAI's chat models

100

+

llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")

101

+

102

+

# Create a chain that combines the prompt and model

103

+

# LLMChain is a basic building block in LangChain for combining prompts with LLMs

104

+

chain = LLMChain(llm=llm, prompt=prompt)

105

+

106

+

return chain, output_parser

107

+

108

+

def display_results(stories: List[Dict], categorized_results: List[StoryAnalysis]):

109

+

"""

110

+

Display the results in a pretty format using Rich.

111

+

112

+

This function shows how to work with the structured output from our LangChain pipeline.

113

+

The categorized_results are strongly typed thanks to our Pydantic model.

114

+

"""

115

+

table = Table(title="Hacker News Stories Analysis", show_header=True, header_style="bold magenta")

116

+

table.add_column("Title", style="cyan", no_wrap=False)

117

+

table.add_column("Category", style="green", no_wrap=True)

118

+

table.add_column("Subcategory", style="yellow", no_wrap=True)

119

+

table.add_column("Summary", style="white", no_wrap=False)

120

+

121

+

for story, result in zip(stories, categorized_results):

122

+

table.add_row(

123

+

story['title'],

124

+

result.category, # Note: Using dot notation because result is a Pydantic model

125

+

result.subcategory,

126

+

result.summary

127

+

)

128

+

129

+

console.print()

130

+

console.print(Panel.fit(

131

+

"🚀 LangChain Hacker News Analyzer",

132

+

subtitle="Analyzing top stories using LangChain and GPT-3.5",

133

+

style="bold blue"

134

+

))

135

+

console.print()

136

+

console.print(table)

137

+

console.print()

138

+

139

+

def main():

140

+

"""

141

+

Main function to run the HN story categorizer.

142

+

143

+

This function orchestrates the entire pipeline:

144

+

1. Fetch stories from HN API

145

+

2. Set up LangChain components

146

+

3. Process stories through the LLM chain

147

+

4. Display results

148

+

"""

149

+

# Show a welcome message

150

+

console.print(Panel.fit(

151

+

"Fetching and analyzing Hacker News stories...",

152

+

style="bold green"

153

+

))

154

+

155

+

# Fetch stories

156

+

stories = fetch_hn_stories(limit=5)

157

+

158

+

# Setup LangChain components

159

+

chain, output_parser = setup_langchain_categorizer()

160

+

161

+

# Process each story

162

+

categorized_results = []

163

+

with console.status("[bold green]Processing stories...") as status:

164

+

for story in stories:

165

+

# Get format instructions from the parser

166

+

format_instructions = output_parser.get_format_instructions()

167

+

168

+

# Run the chain

169

+

result = chain.run(

170

+

title=story['title'],

171

+

url=story['url'],

172

+

format_instructions=format_instructions

173

+

)

174

+

175

+

# Parse the result into our Pydantic model

176

+

parsed_result = output_parser.parse(result)

177

+

categorized_results.append(parsed_result)

178

+

179

+

# Display results

180

+

display_results(stories, categorized_results)

181

+

182

+

if __name__ == "__main__":

183

+

main()

		@@ -0,0 +1,6 @@
1	+	langchain==0.0.340
2	+	openai==1.3.7
3	+	python-dotenv==1.0.0
4	+	requests==2.31.0
5	+	rich==13.7.0
6	+	beautifulsoup4==4.12.2

openhands / requirements.txt

openhands revised this gist 1730886079. Go to revision

		@@ -0,0 +1,183 @@
1	+	"""
2	+	LangChain Hacker News Story Categorizer Tutorial
3	+
4	+	This script demonstrates key LangChain concepts by building a simple application
5	+	that fetches Hacker News stories and categorizes them using OpenAI's GPT model.
6	+
7	+	Key LangChain Concepts Demonstrated:
8	+	1. Chains: Sequences of operations that can be combined
9	+	2. Prompts: Structured way to interact with LLMs
10	+	3. LLMs: Language Model integration
11	+	4. Pydantic Output Parsing: Type-safe structured output handling
12	+	"""
13	+
14	+	import os
15	+	import requests
16	+	from typing import List, Dict
17	+	from rich.console import Console
18	+	from rich.panel import Panel
19	+	from rich.table import Table
20	+	from rich import print as rprint
21	+	from pydantic import BaseModel, Field
22	+
23	+	from langchain.chains import LLMChain
24	+	from langchain.chat_models import ChatOpenAI
25	+	from langchain.prompts import ChatPromptTemplate
26	+	from langchain.output_parsers import PydanticOutputParser
27	+
28	+	# Initialize Rich console for pretty output
29	+	console = Console()
30	+
31	+	# Your OpenAI API key - Replace with your actual key
32	+	os.environ["OPENAI_API_KEY"] = "YOUR-OPENAI-API-KEY" # Replace this with your actual API key
33	+
34	+	# Define our Pydantic model for structured output
35	+	class StoryAnalysis(BaseModel):
36	+	"""
37	+	Pydantic model for story analysis output.
38	+	Using Pydantic provides type safety and validation.
39	+	"""
40	+	category: str = Field(description="The main category of the story (Tech, Business, Science, etc.)")
41	+	subcategory: str = Field(description="A more specific subcategory")
42	+	summary: str = Field(description="A brief 1-2 sentence summary of the story's main points")
43	+
44	+	def fetch_hn_stories(limit: int = 5) -> List[Dict]:
45	+	"""
46	+	Fetch top stories from Hacker News.
47	+
48	+	This function demonstrates basic API interaction outside of LangChain.
49	+	Later, we'll combine this with LangChain components.
50	+	"""
51	+	# Get top story IDs
52	+	response = requests.get("https://hacker-news.firebaseio.com/v0/topstories.json")
53	+	story_ids = response.json()[:limit]
54	+
55	+	stories = []
56	+	for story_id in story_ids:
57	+	# Fetch individual story details
58	+	story_url = f"https://hacker-news.firebaseio.com/v0/item/{story_id}.json"
59	+	story_response = requests.get(story_url)
60	+	story_data = story_response.json()
61	+
62	+	if story_data and 'title' in story_data:
63	+	stories.append({
64	+	'title': story_data['title'],
65	+	'url': story_data.get('url', ''),
66	+	'score': story_data.get('score', 0)
67	+	})
68	+
69	+	return stories
70	+
71	+	def setup_langchain_categorizer():
72	+	"""
73	+	Set up the LangChain components for story categorization.
74	+
75	+	This demonstrates several key LangChain concepts:
76	+	1. Pydantic Output Parser: Type-safe structured output
77	+	2. Prompt Templates: Create reusable prompts
78	+	3. LLM Chain: Combine prompts and models
79	+	"""
80	+
81	+	# Create a Pydantic output parser
82	+	# This is a more modern approach than using ResponseSchema
83	+	output_parser = PydanticOutputParser(pydantic_object=StoryAnalysis)
84	+
85	+	# Create a prompt template with format instructions
86	+	# This shows how to create structured prompts in LangChain
87	+	prompt = ChatPromptTemplate.from_template("""
88	+	Analyze the following Hacker News story and provide a categorization and summary.
89	+
90	+	Story Title: {title}
91	+	URL: {url}
92	+
93	+	{format_instructions}
94	+
95	+	Provide your analysis in the exact format specified above:
96	+	""")
97	+
98	+	# Initialize the language model
99	+	# ChatOpenAI is a LangChain wrapper around OpenAI's chat models
100	+	llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
101	+
102	+	# Create a chain that combines the prompt and model
103	+	# LLMChain is a basic building block in LangChain for combining prompts with LLMs
104	+	chain = LLMChain(llm=llm, prompt=prompt)
105	+
106	+	return chain, output_parser
107	+
108	+	def display_results(stories: List[Dict], categorized_results: List[StoryAnalysis]):
109	+	"""
110	+	Display the results in a pretty format using Rich.
111	+
112	+	This function shows how to work with the structured output from our LangChain pipeline.
113	+	The categorized_results are strongly typed thanks to our Pydantic model.
114	+	"""
115	+	table = Table(title="Hacker News Stories Analysis", show_header=True, header_style="bold magenta")
116	+	table.add_column("Title", style="cyan", no_wrap=False)
117	+	table.add_column("Category", style="green", no_wrap=True)
118	+	table.add_column("Subcategory", style="yellow", no_wrap=True)
119	+	table.add_column("Summary", style="white", no_wrap=False)
120	+
121	+	for story, result in zip(stories, categorized_results):
122	+	table.add_row(
123	+	story['title'],
124	+	result.category, # Note: Using dot notation because result is a Pydantic model
125	+	result.subcategory,
126	+	result.summary
127	+	)
128	+
129	+	console.print()
130	+	console.print(Panel.fit(
131	+	"🚀 LangChain Hacker News Analyzer",
132	+	subtitle="Analyzing top stories using LangChain and GPT-3.5",
133	+	style="bold blue"
134	+	))
135	+	console.print()
136	+	console.print(table)
137	+	console.print()
138	+
139	+	def main():
140	+	"""
141	+	Main function to run the HN story categorizer.
142	+
143	+	This function orchestrates the entire pipeline:
144	+	1. Fetch stories from HN API
145	+	2. Set up LangChain components
146	+	3. Process stories through the LLM chain
147	+	4. Display results
148	+	"""
149	+	# Show a welcome message
150	+	console.print(Panel.fit(
151	+	"Fetching and analyzing Hacker News stories...",
152	+	style="bold green"
153	+	))
154	+
155	+	# Fetch stories
156	+	stories = fetch_hn_stories(limit=5)
157	+
158	+	# Setup LangChain components
159	+	chain, output_parser = setup_langchain_categorizer()
160	+
161	+	# Process each story
162	+	categorized_results = []
163	+	with console.status("[bold green]Processing stories...") as status:
164	+	for story in stories:
165	+	# Get format instructions from the parser
166	+	format_instructions = output_parser.get_format_instructions()
167	+
168	+	# Run the chain
169	+	result = chain.run(
170	+	title=story['title'],
171	+	url=story['url'],
172	+	format_instructions=format_instructions
173	+	)
174	+
175	+	# Parse the result into our Pydantic model
176	+	parsed_result = output_parser.parse(result)
177	+	categorized_results.append(parsed_result)
178	+
179	+	# Display results
180	+	display_results(stories, categorized_results)
181	+
182	+	if __name__ == "__main__":
183	+	main()