difflib
difflib 是 Python 内置的模块,用于比较序列,生成差异报告,支持多种输出格式。
比较字符串
import difflib
text1 = "Hello, World!"
text2 = "Hello, Python!"
# 创建 SequenceMatcher
matcher = difflib.SequenceMatcher(None, text1, text2)
# 相似度
ratio = matcher.ratio()
print(f"相似度: {ratio:.2f}")
# 获取匹配块
for block in matcher.get_matching_blocks():
print(f"匹配: text1[{block.a}:{block.a + block.size}] = text2[{block.b}:{block.b + block.size}]")
统一差异格式
import difflib
text1 = """Line 1
Line 2
Line 3
Line 4"""
text2 = """Line 1
Line 2 modified
Line 3
Line 4 added"""
# 生成差异
diff = difflib.unified_diff(
text1.splitlines(keepends=True),
text2.splitlines(keepends=True),
fromfile='file1.txt',
tofile='file2.txt',
lineterm=''
)
print(''.join(diff))
上下文差异格式
import difflib
text1 = """Line 1
Line 2
Line 3
Line 4"""
text2 = """Line 1
Line 2 modified
Line 3
Line 4 added"""
# 生成上下文差异
diff = difflib.context_diff(
text1.splitlines(keepends=True),
text2.splitlines(keepends=True),
fromfile='file1.txt',
tofile='file2.txt'
)
print(''.join(diff))
HTML 差异格式
import difflib
text1 = "Hello, World!"
text2 = "Hello, Python!"
# 生成 HTML 差异
diff = difflib.HtmlDiff().make_table(
text1.splitlines(),
text2.splitlines(),
fromdesc='Text 1',
todesc='Text 2'
)
print(diff)
比较文件
import difflib
# 读取文件
with open('file1.txt', 'r') as f:
text1 = f.readlines()
with open('file2.txt', 'r') as f:
text2 = f.readlines()
# 生成差异
diff = difflib.unified_diff(text1, text2, fromfile='file1.txt', tofile='file2.txt')
# 输出差异
for line in diff:
print(line, end='')
NDiff 格式
import difflib
text1 = "Hello, World!"
text2 = "Hello, Python!"
# 生成 NDiff 格式
diff = difflib.ndiff(text1.splitlines(), text2.splitlines())
for line in diff:
print(line)
获取差异操作
import difflib
text1 = "Hello, World!"
text2 = "Hello, Python!"
# 获取操作码
matcher = difflib.SequenceMatcher(None, text1, text2)
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
print(f"{tag}: text1[{i1}:{i2}] -> text2[{j1}:{j2}]")
if tag == 'replace':
print(f" 替换: '{text1[i1:i2]}' -> '{text2[j1:j2]}'")
elif tag == 'delete':
print(f" 删除: '{text1[i1:i2]}'")
elif tag == 'insert':
print(f" 插入: '{text2[j1:j2]}'")
elif tag == 'equal':
print(f" 相等: '{text1[i1:i2]}'")
💡 提示:difflib 适合文本比较、版本控制、差异分析等场景。