Python 检测歌曲文件是否包含内嵌歌词

安装 mutagen 模块:

pip3 install mutagen

使用如下脚本即可检测同脚本目录下的所有目录以及文件是否包含歌词,也可以传递目录进行检测。

import os
import sys
from mutagen import File
from mutagen.id3 import ID3, USLT
from mutagen.mp4 import MP4
 
AUDIO_EXTENSIONS = {'.mp3', '.flac', '.m4a', '.mp4', '.ogg', '.oga', '.wav', '.aac', '.opus'}
 
def has_lyrics(audio, filepath):
    """检查音频文件是否包含歌词信息"""
    if audio is None:
        return False
 
    # MP3: 检查 ID3 标签中的 USLT(未同步歌词)
    if filepath.lower().endswith(".mp3"):
        try:
            tags = ID3(filepath)
            for tag in tags.values():
                if isinstance(tag, USLT):
                    if tag.text and tag.text.strip():
                        return True
        except Exception as e:
            print(f"⚠️ [ID3解析失败] {filepath}: {e}")
            return False
 
    # M4A / MP4 特殊处理
    if filepath.lower().endswith((".m4a", ".mp4")) and isinstance(audio, MP4):
        # iTunes 标准歌词字段
        lyrics = audio.tags.get("©lyr")
        if lyrics and any(l.strip() for l in lyrics):
            return True
        # 自定义歌词字段(某些工具写入)
        lyrics_custom = audio.tags.get("----:com.apple.iTunes:LYRICS")
        if lyrics_custom:
            for entry in lyrics_custom:
                try:
                    text = entry.decode('utf-8', errors='ignore').strip()
                    if text:
                        return True
                except Exception:
                    continue
 
    # 通用格式:如 FLAC / OGG / OPUS 等
    for key in audio.keys():
        if 'lyric' in key.lower() or key.lower() == 'unsynchronised lyrics':
            value = audio.get(key)
            if isinstance(value, list):
                return any(v and str(v).strip() for v in value)
            return bool(str(value).strip())
 
    return False
 
def find_songs_without_lyrics(base_dir):
    no_lyrics_files = []
    has_lyrics_files = []
 
    for root, _, files in os.walk(base_dir):
        for file in files:
            ext = os.path.splitext(file)[1].lower()
            if ext in AUDIO_EXTENSIONS:
                path = os.path.join(root, file)
                try:
                    audio = File(path)
                    if has_lyrics(audio, path):
                        has_lyrics_files.append(path)
                    else:
                        print(f"❌ 无歌词: {path}")
                        no_lyrics_files.append(path)
                except Exception as e:
                    print(f"⚠️ 无法读取文件: {path} 错误: {e}")
 
    # 输出统计信息
    print("\n📊 总结:")
    print(f"✅ 有歌词文件数: {len(has_lyrics_files)}")
    print(f"❌ 无歌词文件数: {len(no_lyrics_files)}")
    print(f"📁 扫描路径: {base_dir}")
 
    return has_lyrics_files, no_lyrics_files
 
if __name__ == "__main__":
    if len(sys.argv) > 1:
        target_dir = sys.argv[1]
    else:
        target_dir = os.path.dirname(os.path.abspath(__file__))
 
    find_songs_without_lyrics(target_dir)
THE END