← python-pptx | filecmp →

difflib

difflib 是 Python 内置的模块,用于比较序列,生成差异报告,支持多种输出格式。

比较字符串

import difflib

text1 = "Hello, World!"
text2 = "Hello, Python!"

# 创建 SequenceMatcher
matcher = difflib.SequenceMatcher(None, text1, text2)

# 相似度
ratio = matcher.ratio()
print(f"相似度: {ratio:.2f}")

# 获取匹配块
for block in matcher.get_matching_blocks():
    print(f"匹配: text1[{block.a}:{block.a + block.size}] = text2[{block.b}:{block.b + block.size}]")

统一差异格式

import difflib

text1 = """Line 1
Line 2
Line 3
Line 4"""

text2 = """Line 1
Line 2 modified
Line 3
Line 4 added"""

# 生成差异
diff = difflib.unified_diff(
    text1.splitlines(keepends=True),
    text2.splitlines(keepends=True),
    fromfile='file1.txt',
    tofile='file2.txt',
    lineterm=''
)

print(''.join(diff))

上下文差异格式

import difflib

text1 = """Line 1
Line 2
Line 3
Line 4"""

text2 = """Line 1
Line 2 modified
Line 3
Line 4 added"""

# 生成上下文差异
diff = difflib.context_diff(
    text1.splitlines(keepends=True),
    text2.splitlines(keepends=True),
    fromfile='file1.txt',
    tofile='file2.txt'
)

print(''.join(diff))

HTML 差异格式

import difflib

text1 = "Hello, World!"
text2 = "Hello, Python!"

# 生成 HTML 差异
diff = difflib.HtmlDiff().make_table(
    text1.splitlines(),
    text2.splitlines(),
    fromdesc='Text 1',
    todesc='Text 2'
)

print(diff)

比较文件

import difflib

# 读取文件
with open('file1.txt', 'r') as f:
    text1 = f.readlines()

with open('file2.txt', 'r') as f:
    text2 = f.readlines()

# 生成差异
diff = difflib.unified_diff(text1, text2, fromfile='file1.txt', tofile='file2.txt')

# 输出差异
for line in diff:
    print(line, end='')

NDiff 格式

import difflib

text1 = "Hello, World!"
text2 = "Hello, Python!"

# 生成 NDiff 格式
diff = difflib.ndiff(text1.splitlines(), text2.splitlines())

for line in diff:
    print(line)

获取差异操作

import difflib

text1 = "Hello, World!"
text2 = "Hello, Python!"

# 获取操作码
matcher = difflib.SequenceMatcher(None, text1, text2)

for tag, i1, i2, j1, j2 in matcher.get_opcodes():
    print(f"{tag}: text1[{i1}:{i2}] -> text2[{j1}:{j2}]")
    if tag == 'replace':
        print(f"  替换: '{text1[i1:i2]}' -> '{text2[j1:j2]}'")
    elif tag == 'delete':
        print(f"  删除: '{text1[i1:i2]}'")
    elif tag == 'insert':
        print(f"  插入: '{text2[j1:j2]}'")
    elif tag == 'equal':
        print(f"  相等: '{text1[i1:i2]}'")
💡 提示:difflib 适合文本比较、版本控制、差异分析等场景。
← python-pptx | filecmp →