Chapter 11: Data Collections 数据集合
Objectives 学习目标
- To understand the use of lists (arrays) to represent a collection of related data 理解使用列表(数组)表示相关数据集合
- To be familiar with the functions and methods available for manipulating Python lists 熟悉操作Python列表的函数和方法
- To be able to write programs that use lists to manage a collection of information 能够编写使用列表管理信息集合的程序
- To be able to write programs that use lists and classes to structure complex data 能够编写使用列表和类来构建复杂数据结构的程序
Example Problem: Simple Statistics 示例问题:简单统计
Statistics Problem 统计问题
# 扩展程序以计算均值、中位数和标准差
def statistics_with_lists():
"""
使用列表进行统计计算
"""
numbers = get_numbers()
if not numbers:
print("没有输入数字")
return
mean_val = mean(numbers)
median_val = median(numbers)
std_dev_val = std_dev(numbers, mean_val)
print(f"数字列表: {numbers}")
print(f"均值: {mean_val:.2f}")
print(f"中位数: {median_val:.2f}")
print(f"标准差: {std_dev_val:.2f}")
def get_numbers():
"""获取数字列表"""
numbers = []
print("输入数字(空行结束):")
while True:
entry = input()
if entry == "":
break
try:
numbers.append(float(entry))
except ValueError:
print("请输入有效数字")
return numbers
def mean(nums):
"""计算均值"""
return sum(nums) / len(nums) if nums else 0
def median(nums):
"""计算中位数"""
sorted_nums = sorted(nums)
n = len(sorted_nums)
if n % 2 == 1: # 奇数个元素
return sorted_nums[n // 2]
else: # 偶数个元素
mid1 = sorted_nums[n // 2 - 1]
mid2 = sorted_nums[n // 2]
return (mid1 + mid2) / 2
def std_dev(nums, mean_val):
"""计算标准差"""
if len(nums) <= 1:
return 0
squared_diffs = [(x - mean_val) ** 2 for x in nums]
variance = sum(squared_diffs) / (len(nums) - 1)
return variance ** 0.5
# statistics_with_lists()Lists as Sequences 列表作为序列
List Basics 列表基础
def list_basics():
"""列表基础操作"""
# 创建列表
empty_list = []
numbers = [1, 2, 3, 4, 5]
mixed_list = [1, "hello", 3.14, True]
# 学生信息列表
student = [2020310912, 'Xiaoming', 'Male', 'China',
'Information School', 'Computer Science', 20020501]
# 嵌套列表
nested_student = [2020310912, 'Xiaoming', 'Male', 'China',
['Information School', 'Computer Science', 20020501]]
print(f"空列表: {empty_list}")
print(f"数字列表: {numbers}")
print(f"混合列表: {mixed_list}")
print(f"学生信息: {student}")
print(f"嵌套学生信息: {nested_student}")
# 列表特性
print(f"\n列表特性:")
print(f"动态性 - 可添加元素: {numbers}")
numbers.append(6)
print(f"添加后: {numbers}")
print(f"异构性 - 包含不同类型: {mixed_list}")
print(f"可变性 - 可修改元素")
numbers[0] = 100
print(f"修改后: {numbers}")
# list_basics()List Operations 列表操作
Adding Elements 添加元素
def list_adding_operations():
"""列表添加操作"""
fruits = ['apple', 'banana']
print(f"原始列表: {fruits}")
# append - 在末尾添加单个元素
fruits.append('cherry')
print(f"append后: {fruits}")
# extend - 扩展列表
more_fruits = ['date', 'elderberry']
fruits.extend(more_fruits)
print(f"extend后: {fruits}")
# insert - 在指定位置插入
fruits.insert(1, 'apricot')
print(f"insert后: {fruits}")
# 使用+运算符
combined = fruits + ['fig', 'grape']
print(f"使用+合并: {combined}")
# list_adding_operations()Removing Elements 删除元素
def list_removing_operations():
"""列表删除操作"""
numbers = [10, 20, 30, 40, 50, 30, 20]
print(f"原始列表: {numbers}")
# pop - 删除并返回指定位置元素
popped = numbers.pop()
print(f"pop()后: {numbers}, 删除的元素: {popped}")
popped_index = numbers.pop(1)
print(f"pop(1)后: {numbers}, 删除的元素: {popped_index}")
# remove - 删除第一个匹配的元素
numbers.remove(30)
print(f"remove(30)后: {numbers}")
# del语句
del numbers[0]
print(f"del numbers[0]后: {numbers}")
# clear - 清空列表
numbers.clear()
print(f"clear后: {numbers}")
# list_removing_operations()Searching and Modifying 搜索和修改
def list_search_modify_operations():
"""列表搜索和修改操作"""
colors = ['red', 'blue', 'green', 'yellow', 'blue', 'purple']
print(f"原始列表: {colors}")
# index - 查找元素位置
blue_index = colors.index('blue')
print(f"'blue'的位置: {blue_index}")
# count - 统计元素出现次数
blue_count = colors.count('blue')
print(f"'blue'出现次数: {blue_count}")
# 修改元素
colors[1] = 'navy blue'
print(f"修改后: {colors}")
# 检查元素是否存在
has_red = 'red' in colors
print(f"'red'是否存在: {has_red}")
# list_search_modify_operations()List Sorting and Reversing 列表排序和反转
Sorting Operations 排序操作
def list_sorting_operations():
"""列表排序操作"""
numbers = [5, 2, 8, 1, 9]
words = ['banana', 'apple', 'cherry', 'date']
mixed_case = ['Apple', 'banana', 'Cherry', 'date']
print(f"原始数字: {numbers}")
print(f"原始单词: {words}")
print(f"混合大小写: {mixed_case}")
# sort方法 - 原地排序
numbers.sort()
print(f"数字sort后: {numbers}")
words.sort()
print(f"单词sort后: {words}")
# 降序排序
numbers.sort(reverse=True)
print(f"数字降序: {numbers}")
# 使用key参数自定义排序
words.sort(key=len) # 按长度排序
print(f"按长度排序: {words}")
mixed_case.sort(key=str.lower) # 忽略大小写排序
print(f"忽略大小写排序: {mixed_case}")
# sorted函数 - 返回新列表
original = [5, 2, 8, 1, 9]
sorted_nums = sorted(original)
print(f"原列表: {original}")
print(f"sorted结果: {sorted_nums}")
# list_sorting_operations()Reversing Operations 反转操作
def list_reversing_operations():
"""列表反转操作"""
numbers = [1, 2, 3, 4, 5]
print(f"原始列表: {numbers}")
# reverse方法 - 原地反转
numbers.reverse()
print(f"reverse后: {numbers}")
# reversed函数 - 返回迭代器
original = [1, 2, 3, 4, 5]
reversed_iter = reversed(original)
reversed_list = list(reversed_iter)
print(f"原列表: {original}")
print(f"reversed结果: {reversed_list}")
# list_reversing_operations()List Functions 列表函数
Built-in Functions 内置函数
def list_functions_demo():
"""列表函数演示"""
numbers = [10, 25, 5, 40, 15]
print(f"列表: {numbers}")
print(f"长度: {len(numbers)}")
print(f"最小值: {min(numbers)}")
print(f"最大值: {max(numbers)}")
print(f"总和: {sum(numbers)}")
# list函数创建列表
string = "hello"
string_list = list(string)
print(f"字符串转列表: {string_list}")
range_obj = range(5)
range_list = list(range_obj)
print(f"range转列表: {range_list}")
# enumerate - 获取索引和元素
fruits = ['apple', 'banana', 'cherry']
print("enumerate示例:")
for index, fruit in enumerate(fruits, start=1):
print(f" {index}. {fruit}")
# list_functions_demo()Map and Zip Functions map和zip函数
def map_zip_functions():
"""map和zip函数演示"""
numbers = [1, 2, 3, 4, 5]
# map - 应用函数到每个元素
squared = list(map(lambda x: x ** 2, numbers))
print(f"数字: {numbers}")
print(f"平方: {squared}")
# zip - 合并多个序列
names = ['Alice', 'Bob', 'Charlie']
scores = [85, 92, 78]
ages = [20, 22, 21]
zipped = list(zip(names, scores, ages))
print(f"合并结果: {zipped}")
# 解压
unzipped_names, unzipped_scores, unzipped_ages = zip(*zipped)
print(f"解压姓名: {unzipped_names}")
print(f"解压分数: {unzipped_scores}")
print(f"解压年龄: {unzipped_ages}")
# map_zip_functions()List Comprehensions 列表推导式
Ex. 补充:列表推导式 (List Comprehensions)
Basic Comprehensions 基础推导式
def list_comprehensions_basics():
"""列表推导式基础"""
# 基本推导式
squares = [x**2 for x in range(1, 6)]
print(f"1-5的平方: {squares}")
# 带条件的推导式
even_squares = [x**2 for x in range(1, 11) if x % 2 == 0]
print(f"偶数的平方: {even_squares}")
# 字符串处理
words = ['hello', 'world', 'python', 'programming']
upper_words = [word.upper() for word in words]
print(f"大写单词: {upper_words}")
long_upper_words = [word.upper() for word in words if len(word) > 5]
print(f"长度>5的大写单词: {long_upper_words}")
# list_comprehensions_basics()Advanced Comprehensions 高级推导式
def advanced_comprehensions():
"""高级列表推导式"""
# 水仙花数 - 三位数,各位数字立方和等于该数本身
narcissistic_numbers = [
num for num in range(100, 1000)
if sum(int(digit)**3 for digit in str(num)) == num
]
print(f"水仙花数: {narcissistic_numbers}")
# 矩阵转置
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
transpose = [[row[i] for row in matrix] for i in range(len(matrix[0]))]
print(f"原始矩阵: {matrix}")
print(f"转置矩阵: {transpose}")
# 扁平化嵌套列表
nested_list = [[1, 2, 3], [4, 5], [6, 7, 8, 9]]
flat_list = [item for sublist in nested_list for item in sublist]
print(f"嵌套列表: {nested_list}")
print(f"扁平化列表: {flat_list}")
# advanced_comprehensions()Monte Carlo Simulation 蒙特卡洛模拟
Calculating π with Monte Carlo 用蒙特卡洛方法计算π
import random
def monte_carlo_pi(num_points=100000):
"""
使用蒙特卡洛方法估算π值
原理:在正方形内随机生成点,统计落在内切圆内的比例
"""
points_inside_circle = 0
for _ in range(num_points):
# 在[-1, 1]x[-1, 1]的正方形内生成随机点
x = random.uniform(-1, 1)
y = random.uniform(-1, 1)
# 检查点是否在单位圆内
if x**2 + y**2 <= 1:
points_inside_circle += 1
# π ≈ 4 * (圆内点数 / 总点数)
estimated_pi = 4 * points_inside_circle / num_points
return estimated_pi
def monte_carlo_comprehension(num_points=100000):
"""使用列表推导式的蒙特卡洛方法"""
points = [(random.uniform(-1, 1), random.uniform(-1, 1))
for _ in range(num_points)]
points_inside = sum(1 for x, y in points if x**2 + y**2 <= 1)
estimated_pi = 4 * points_inside / num_points
return estimated_pi
def run_monte_carlo():
"""运行蒙特卡洛模拟"""
import math
num_points = 100000
pi_estimate1 = monte_carlo_pi(num_points)
pi_estimate2 = monte_carlo_comprehension(num_points)
print(f"蒙特卡洛方法估算π值:")
print(f"方法1: {pi_estimate1:.6f}")
print(f"方法2: {pi_estimate2:.6f}")
print(f"真实π值: {math.pi:.6f}")
print(f"方法1误差: {abs(pi_estimate1 - math.pi):.6f}")
print(f"方法2误差: {abs(pi_estimate2 - math.pi):.6f}")
# run_monte_carlo()Lambda Functions Lambda函数
Lambda Basics Lambda基础
def lambda_functions_demo():
"""Lambda函数演示"""
# 基本lambda函数
square = lambda x: x ** 2
print(f"5的平方: {square(5)}")
# 多参数lambda
multiply = lambda x, y: x * y
print(f"3 * 4 = {multiply(3, 4)}")
# 在map中使用lambda
numbers = [1, 2, 3, 4, 5]
squared = list(map(lambda x: x ** 2, numbers))
print(f"数字平方: {squared}")
# 在filter中使用lambda
even_numbers = list(filter(lambda x: x % 2 == 0, numbers))
print(f"偶数: {even_numbers}")
# 在sorted中使用lambda
students = [
['Angle', '0121701100106', 99],
['Jack', '0121701100107', 86],
['Tom', '0121701100109', 65],
['Smith', '0121701100111', 100],
['Bob', '0121701100115', 77],
['Lily', '0121701100117', 59]
]
# 按姓名排序
sorted_by_name = sorted(students, key=lambda x: x[0])
print("按姓名排序:")
for student in sorted_by_name:
print(f" {student}")
# 按学号排序
sorted_by_id = sorted(students, key=lambda x: x[1])
print("\n按学号排序:")
for student in sorted_by_id:
print(f" {student}")
# 按成绩排序(降序)
sorted_by_grade = sorted(students, key=lambda x: x[2], reverse=True)
print("\n按成绩排序(降序):")
for student in sorted_by_grade:
print(f" {student[0]}: {student[2]}")
# lambda_functions_demo()Lists of Objects 对象列表
Student Records Processing 学生记录处理
class Student:
"""学生类"""
def __init__(self, name, hours, qpoints):
self.name = name
self.hours = float(hours)
self.qpoints = float(qpoints)
def getName(self):
return self.name
def getHours(self):
return self.hours
def getQPoints(self):
return self.qpoints
def gpa(self):
return self.qpoints / self.hours if self.hours > 0 else 0.0
def __str__(self):
return f"{self.name}: {self.hours}学时, GPA={self.gpa():.3f}"
def process_student_records():
"""处理学生记录"""
# 创建学生对象列表
students = [
Student("Adams, Henry", 127, 228),
Student("Comptewell, Susan", 100, 400),
Student("DibbleBit, Denny", 18, 41.5),
Student("Jones, Jim", 48.5, 155),
Student("Smith, Frank", 37, 125.33)
]
print("原始学生列表:")
for student in students:
print(f" {student}")
# 按GPA排序
students_sorted_by_gpa = sorted(students, key=lambda s: s.gpa(), reverse=True)
print("\n按GPA排序:")
for student in students_sorted_by_gpa:
print(f" {student}")
# 找出GPA最高的学生
best_student = max(students, key=lambda s: s.gpa())
print(f"\nGPA最高的学生: {best_student}")
# 统计信息
total_hours = sum(student.getHours() for student in students)
avg_gpa = sum(student.gpa() for student in students) / len(students)
print(f"\n统计信息:")
print(f"总学时: {total_hours}")
print(f"平均GPA: {avg_gpa:.3f}")
# process_student_records()Designing with Lists and Classes 使用列表和类进行设计
Improved DieView Class 改进的DieView类
from graphics import *
class DieView:
"""
改进的骰子视图类 - 使用列表管理pip
"""
def __init__(self, win, center, size):
self.win = win
self.center = center
self.size = size
self.value = 1
# 创建背景
half = size / 2
x, y = center.getX(), center.getY()
self.background = Rectangle(Point(x-half, y-half), Point(x+half, y+half))
self.background.setFill('white')
self.background.setOutline('black')
self.background.draw(win)
# 使用列表存储所有pip
self.pips = []
self.__create_pips()
# pip激活表 - 每个数字对应的pip索引
self.on_table = [
[], # 0 (不使用)
[4], # 1 (中间)
[0, 8], # 2 (左上, 右下)
[0, 4, 8], # 3 (左上, 中间, 右下)
[0, 2, 6, 8], # 4 (四个角)
[0, 2, 4, 6, 8], # 5 (四个角 + 中间)
[0, 2, 3, 5, 6, 8] # 6 (两列)
]
self.setValue(1)
def __create_pips(self):
"""创建所有pip位置"""
# pip的相对位置 (3x3网格)
positions = [
(-1, -1), (0, -1), (1, -1), # 上排
(-1, 0), (0, 0), (1, 0), # 中排
(-1, 1), (0, 1), (1, 1) # 下排
]
pip_radius = self.size * 0.1
for dx, dy in positions:
x = self.center.getX() + dx * self.size * 0.3
y = self.center.getY() + dy * self.size * 0.3
pip = Circle(Point(x, y), pip_radius)
pip.setFill('black')
self.pips.append(pip)
def setValue(self, value):
"""设置骰子值"""
if 1 <= value <= 6:
self.value = value
self.__update_display()
def __update_display(self):
"""更新显示"""
# 隐藏所有pip
for pip in self.pips:
pip.undraw()
# 显示当前值对应的pip
for pip_index in self.on_table[self.value]:
self.pips[pip_index].draw(self.win)
def getValue(self):
return self.value
# 优势:代码更简洁,易于维护和扩展Case Study: Python Calculator 案例研究:Python计算器
Calculator Class 计算器类
from graphics import *
class Calculator:
"""
图形化计算器类
"""
def __init__(self):
"""初始化计算器界面"""
self.win = GraphWin("Calculator", 300, 400)
self.win.setBackground("lightgray")
# 创建按钮规格
button_specs = [
(1, 1, '7'), (2, 1, '8'), (3, 1, '9'), (4, 1, '/'),
(1, 2, '4'), (2, 2, '5'), (3, 2, '6'), (4, 2, '*'),
(1, 3, '1'), (2, 3, '2'), (3, 3, '3'), (4, 3, '-'),
(1, 4, '0'), (2, 4, '.'), (3, 4, '='), (4, 4, '+'),
(1, 5, 'C'), (2, 5, '<-'), (3, 5, '('), (4, 5, ')')
]
# 创建按钮列表
self.buttons = []
for col, row, label in button_specs:
center = Point(col * 60 + 30, row * 60 + 60)
button = Button(self.win, center, 50, 50, label)
button.activate()
self.buttons.append(button)
# 创建显示区域
self.display_bg = Rectangle(Point(30, 10), Point(270, 50))
self.display_bg.setFill('white')
self.display_bg.draw(self.win)
self.display_text = Text(Point(150, 30), "")
self.display_text.draw(self.win)
self.current_input = ""
def get_button(self):
"""获取被点击的按钮"""
while True:
click = self.win.getMouse()
for button in self.buttons:
if button.clicked(click):
return button.getLabel()
def process_button(self, label):
"""处理按钮点击"""
if label == 'C': # 清除
self.current_input = ""
elif label == '<-': # 退格
self.current_input = self.current_input[:-1]
elif label == '=': # 计算
try:
result = eval(self.current_input)
self.current_input = str(result)
except:
self.current_input = "ERROR"
else: # 其他按钮
self.current_input += label
# 更新显示
self.display_text.setText(self.current_input)
def run(self):
"""运行计算器"""
while True:
button_label = self.get_button()
if button_label == 'C' and self.current_input == "":
break # 双击C退出
self.process_button(button_label)
self.win.close()
# 需要Button类的实现
# calc = Calculator()
# calc.run()Case Study: Better Cannonball Animation 案例研究:更好的炮弹动画
Projectile Application with Multiple Shots 多炮弹的抛射物应用
from graphics import *
from projectile import Projectile
import time
class ShotTracker:
"""炮弹轨迹跟踪器"""
def __init__(self, win, angle, velocity, height):
self.win = win
self.proj = Projectile(angle, velocity, height)
# 图形标记
self.marker = Circle(Point(0, 0), 3)
self.marker.setFill("red")
self.marker.setOutline("red")
self.marker.draw(win)
self.update(0)
def update(self, dt):
"""更新位置"""
self.proj.update(dt)
x = self.proj.getX()
y = self.win.getHeight() - self.proj.getY() # 坐标系转换
center = self.marker.getCenter()
dx = x - center.getX()
dy = y - center.getY()
self.marker.move(dx, dy)
def getX(self): return self.proj.getX()
def getY(self): return self.proj.getY()
def is_alive(self):
"""检查炮弹是否仍在活动"""
return (0 <= self.getX() <= self.win.getWidth() and
self.getY() >= 0)
def undraw(self):
"""移除图形"""
self.marker.undraw()
class Launcher:
"""发射器类"""
def __init__(self, win):
self.win = win
self.angle = 45.0
self.vel = 40.0
self.arrow = None
self.redraw()
def adj_angle(self, amt):
"""调整角度"""
self.angle += amt
self.angle = max(0, min(90, self.angle)) # 限制在0-90度
self.redraw()
def adj_vel(self, amt):
"""调整速度"""
self.vel += amt
self.vel = max(0, self.vel) # 速度不能为负
self.redraw()
def redraw(self):
"""重绘箭头"""
if self.arrow:
self.arrow.undraw()
# 计算箭头位置
x1, y1 = 0, 0
rad = math.radians(self.angle)
x2 = self.vel * math.cos(rad)
y2 = self.vel * math.sin(rad)
self.arrow = Line(Point(x1, self.win.getHeight() - y1),
Point(x2, self.win.getHeight() - y2))
self.arrow.setArrow("last")
self.arrow.setWidth(3)
self.arrow.draw(self.win)
def fire(self):
"""发射炮弹"""
return ShotTracker(self.win, self.angle, self.vel, 0)
class ProjectileApp:
"""抛射物应用主类"""
def __init__(self):
self.win = GraphWin("Cannonball Animation", 640, 480, autoflush=False)
self.win.setCoords(-10, -10, 210, 155)
self.win.setBackground("lightblue")
# 绘制地面
ground = Line(Point(-10, 0), Point(210, 0))
ground.setWidth(3)
ground.draw(self.win)
# 创建发射器和炮弹列表
self.launcher = Launcher(self.win)
self.shots = []
def update_shots(self, dt):
"""更新所有炮弹位置"""
alive_shots = []
for shot in self.shots:
shot.update(dt)
if shot.is_alive():
alive_shots.append(shot)
else:
shot.undraw()
self.shots = alive_shots
def run(self):
"""运行主循环"""
while True:
# 更新所有炮弹
self.update_shots(1/30)
# 检查键盘输入
key = self.win.checkKey()
if key == "q":
break
elif key == "Up":
self.launcher.adj_angle(5)
elif key == "Down":
self.launcher.adj_angle(-5)
elif key == "Right":
self.launcher.adj_vel(5)
elif key == "Left":
self.launcher.adj_vel(-5)
elif key == "f":
# 发射新炮弹
new_shot = self.launcher.fire()
self.shots.append(new_shot)
# 控制帧率
update(30)
self.win.close()
# app = ProjectileApp()
# app.run()Tuples 元组
Tuple Basics 元组基础
def tuple_operations():
"""元组操作"""
# 创建元组
empty_tuple = ()
single_tuple = (5,) # 注意逗号
numbers = (1, 2, 3, 4, 5)
mixed = (1, "hello", 3.14, True)
print(f"空元组: {empty_tuple}")
print(f"单元素元组: {single_tuple}")
print(f"数字元组: {numbers}")
print(f"混合元组: {mixed}")
# 自动打包
auto_pack = 1, 2, 3
print(f"自动打包: {auto_pack}")
# 解包
a, b, c = auto_pack
print(f"解包: a={a}, b={b}, c={c}")
# 元组操作
print(f"长度: {len(numbers)}")
print(f"索引: numbers[1] = {numbers[1]}")
print(f"切片: numbers[1:4] = {numbers[1:4]}")
print(f"计数3出现次数: {numbers.count(3)}")
print(f"3的位置: {numbers.index(3)}")
# 元组不可变
try:
numbers[0] = 10 # 这会报错
except TypeError as e:
print(f"错误: {e}")
# 元组用作字典键
coordinates = {(1, 2): "Point A", (3, 4): "Point B"}
print(f"坐标字典: {coordinates}")
# tuple_operations()Non-sequential Collections 非顺序集合
Dictionary Basics 字典基础
def dictionary_basics():
"""字典基础"""
# 创建字典
empty_dict = {}
student_grades = {'Alice': 85, 'Bob': 92, 'Charlie': 78}
mixed_dict = {'name': 'John', 'age': 25, 'scores': [85, 92, 78]}
print(f"空字典: {empty_dict}")
print(f"学生成绩: {student_grades}")
print(f"混合字典: {mixed_dict}")
# 访问元素
print(f"Alice的成绩: {student_grades['Alice']}")
print(f"使用get: {student_grades.get('David', '未找到')}")
# 添加和修改
student_grades['David'] = 88
student_grades['Alice'] = 90
print(f"更新后: {student_grades}")
# 字典操作
print(f"键: {list(student_grades.keys())}")
print(f"值: {list(student_grades.values())}")
print(f"键值对: {list(student_grades.items())}")
# 检查键是否存在
has_alice = 'Alice' in student_grades
print(f"Alice是否存在: {has_alice}")
# dictionary_basics()Dictionary Operations 字典操作
def dictionary_operations():
"""字典操作"""
# 不同创建方式
dict1 = {'a': 1, 'b': 2, 'c': 3}
dict2 = dict(a=1, b=2, c=3) # 关键字参数
dict3 = dict([('a', 1), ('b', 2), ('c', 3)]) # 键值对序列
dict4 = dict(zip(['a', 'b', 'c'], [1, 2, 3])) # zip函数
print(f"方法1: {dict1}")
print(f"方法2: {dict2}")
print(f"方法3: {dict3}")
print(f"方法4: {dict4}")
# fromkeys方法
keys = ['name', 'age', 'city']
default_dict = dict.fromkeys(keys, 'unknown')
print(f"fromkeys: {default_dict}")
# 字典推导式
squares = {x: x**2 for x in range(1, 6)}
print(f"平方字典: {squares}")
# 更新字典
original = {'a': 1, 'b': 2}
original.update({'b': 20, 'c': 3})
print(f"更新后: {original}")
# 删除元素
value = original.pop('a')
print(f"删除'a': {original}, 返回值: {value}")
# 清空字典
original.clear()
print(f"清空后: {original}")
# dictionary_operations()Set Basics 集合基础
def set_operations():
"""集合操作"""
# 创建集合
empty_set = set()
numbers = {1, 2, 3, 4, 5}
mixed_set = {1, 'hello', 3.14, (1, 2)} # 注意:不能包含可变元素
print(f"空集合: {empty_set}")
print(f"数字集合: {numbers}")
print(f"混合集合: {mixed_set}")
# 集合操作
set1 = {1, 2, 3, 4, 5}
set2 = {4, 5, 6, 7, 8}
print(f"集合并集: {set1 | set2}")
print(f"集合交集: {set1 & set2}")
print(f"集合差集: {set1 - set2}")
print(f"集合对称差集: {set1 ^ set2}")
# 集合方法
set1.add(6)
print(f"添加6后: {set1}")
set1.remove(1)
print(f"删除1后: {set1}")
popped = set1.pop()
print(f"弹出元素: {popped}, 剩余集合: {set1}")
# 集合推导式
even_squares = {x**2 for x in range(10) if x**2 % 2 == 0}
print(f"偶数的平方集合: {even_squares}")
# set_operations()Example Program: Word Frequency 示例程序:词频统计
Word Frequency Analysis 词频分析
import string
from collections import Counter
def word_frequency_analysis(text, n=10, remove_stopwords=True):
"""
词频分析
"""
# 英文停用词
stop_words = {'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at',
'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were'}
# 文本预处理
text = text.lower() # 转换为小写
text = text.translate(str.maketrans('', '', string.punctuation)) # 移除标点
# 分割单词
words = text.split()
# 词频统计
word_counts = {}
for word in words:
if remove_stopwords and word in stop_words:
continue
word_counts[word] = word_counts.get(word, 0) + 1
return word_counts
def print_word_frequency(word_counts, n=10):
"""打印词频结果"""
# 按频率排序
sorted_words = sorted(word_counts.items(),
key=lambda x: (-x[1], x[0])) # 先按频率降序,再按字母升序
print(f"前{n}个最频繁的单词:")
for i, (word, count) in enumerate(sorted_words[:n], 1):
print(f"{i:2d}. {word:15s} : {count:3d}")
def advanced_word_frequency(text, n=10):
"""使用Counter的高级词频分析"""
# 文本预处理
text = text.lower()
text = text.translate(str.maketrans('', '', string.punctuation))
words = text.split()
# 使用Counter
word_counter = Counter(words)
# 移除停用词
stop_words = {'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at',
'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were'}
for word in stop_words:
if word in word_counter:
del word_counter[word]
# 获取最常见的n个单词
most_common = word_counter.most_common(n)
print("使用Counter的词频分析:")
for i, (word, count) in enumerate(most_common, 1):
print(f"{i:2d}. {word:15s} : {count:3d}")
return word_counter
def process_multiple_grades():
"""处理多个成绩单"""
grade1 = {'Sam': 89, 'Anne': 95, 'Lily': 67, 'Jim': 75}
grade2 = {'Sam': 75, 'Anne': 79, 'Lily': 79}
grade3 = {'Lily': 87, 'Jim': 91, 'Zack': 75}
grade4 = {'Sam': 89, 'Anne': 86, 'Zack': 99}
# 合并所有成绩单
all_grades = {}
course_count = {}
# 处理每个成绩单
for i, grade_dict in enumerate([grade1, grade2, grade3, grade4], 1):
for student, score in grade_dict.items():
if student not in all_grades:
all_grades[student] = []
course_count[student] = 0
all_grades[student].append(score)
course_count[student] += 1
print("学生成绩统计:")
for student in sorted(all_grades.keys()):
scores = all_grades[student]
avg_score = sum(scores) / len(scores)
print(f"{student}: {course_count[student]}门课程, "
f"成绩: {scores}, 平均分: {avg_score:.1f}")
# 示例文本
sample_text = """
Python is an interpreted, high-level, general-purpose programming language.
Created by Guido van Rossum and first released in 1991, Python's design
philosophy emphasizes code readability with its notable use of significant
whitespace. Its language constructs and object-oriented approach aim to help
programmers write clear, logical code for small and large-scale projects.
"""
# 运行词频分析
# word_counts = word_frequency_analysis(sample_text)
# print_word_frequency(word_counts)
# advanced_word_frequency(sample_text)
# process_multiple_grades()WordCloud and Chinese Text Processing 词云和中文文本处理
WordCloud Generation 词云生成
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import jieba
def generate_english_wordcloud(text, output_file='wordcloud.png'):
"""生成英文词云"""
# 创建词云对象
wordcloud = WordCloud(
width=800,
height=400,
background_color='white',
max_words=100,
colormap='viridis'
).generate(text)
# 显示词云
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud')
plt.show()
# 保存词云
wordcloud.to_file(output_file)
print(f"词云已保存为: {output_file}")
def chinese_word_frequency(text):
"""中文词频分析"""
# 使用jieba分词
words = jieba.lcut(text)
# 词频统计
word_counts = {}
for word in words:
if len(word) > 1: # 过滤单字
word_counts[word] = word_counts.get(word, 0) + 1
return word_counts
def generate_chinese_wordcloud(text, output_file='chinese_wordcloud.png'):
"""生成中文词云"""
# 中文分词
words = jieba.lcut(text)
text_processed = ' '.join(words)
# 创建词云对象
wordcloud = WordCloud(
font_path='simhei.ttf', # 中文字体路径
width=800,
height=400,
background_color='white',
max_words=100
).generate(text_processed)
# 显示词云
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('中文词云')
plt.show()
wordcloud.to_file(output_file)
print(f"中文词云已保存为: {output_file}")
def three_kingdoms_analysis():
"""《三国演义》词频分析示例"""
# 示例文本(实际应用中应从文件读取)
sample_text = """
滚滚长江东逝水,浪花淘尽英雄。是非成败转头空。青山依旧在,几度夕阳红。
白发渔樵江渚上,惯看秋月春风。一壶浊酒喜相逢。古今多少事,都付笑谈中。
"""
print("中文分词示例:")
words = jieba.lcut(sample_text)
print(f"分词结果: {words}")
# 词频统计
word_counts = chinese_word_frequency(sample_text)
sorted_words = sorted(word_counts.items(), key=lambda x: -x[1])
print("\n词频统计:")
for word, count in sorted_words[:10]:
print(f"{word}: {count}次")
return word_counts
# 需要安装wordcloud, jieba, matplotlib
# generate_english_wordcloud(sample_text)
# three_kingdoms_analysis()Summary 总结
Key Concepts 关键概念
-
Lists列表
- Dynamic, heterogeneous, mutable sequences 动态、异构、可变序列
- Comprehensive methods for manipulation 全面的操作方法
- List comprehensions for concise creation 列表推导式用于简洁创建
-
List Operations列表操作
- Adding: append, extend, insert 添加元素
- Removing: pop, remove, del, clear 删除元素
- Searching: index, count, in operator 搜索元素
- Sorting: sort, sorted, reverse 排序和反转
-
Functional Programming函数式编程
- Lambda functions for anonymous operations Lambda函数用于匿名操作
- map, filter, zip for data transformation map、filter、zip用于数据转换
- List comprehensions as alternative 列表推导式作为替代
-
Data Collections数据集合
- Tuples: immutable sequences 元组:不可变序列
- Dictionaries: key-value mappings 字典:键值映射
- Sets: unordered unique elements 集合:无序唯一元素
-
Real-world Applications实际应用
- Statistics and data analysis 统计和数据分析
- Text processing and word frequency 文本处理和词频统计
- Object-oriented design with collections 使用集合的面向对象设计
Best Practices 最佳实践
- Choose appropriate collection type based on needs 根据需求选择合适的集合类型
- Use list comprehensions for readable code 使用列表推导式编写可读性强的代码
- Leverage built-in functions and methods 利用内置函数和方法
- Consider performance for large datasets 对于大数据集考虑性能因素
- Use encapsulation with object collections 使用对象集合的封装
通过掌握数据集合,可以高效地组织和处理各种复杂数据,构建强大的数据处理应用程序!