Title
Title
TIT2
title
-metadata title=”海阔天空”
Subtitle
Description(Video tab)
TIT3
TIT3
-metadata TIT3=”beyond 20周年纪念版”
Rating
n/a
n/a
n/a
n/a
Comments
Comments
COMM
n/a
n/a
Contributing artists
Artist
TPE1
artist
-metadata artist=”黄家驹”
Album artist
Album artist
TPE2
album_artist
-metadata album_artist=”Josh Groban”
Album
Album
TALB
album
-metadata album=”Closer”
Year
Year
TYER
date
-metadata date=”2009”
#
Track Number
TRCK
track
-metadata track=”3/12”(12首歌中的第3个)
Genre
Genre
TCON
genre
-metadata genre=”Vocal”
Publisher
n/a
TPUB
publisher
-metadata publisher=”Heaven Church”
Encoded by
n/a
TENC
encoded_by
-metadata encoded_by=”Joshua”
Aythor URL
n/a
WOAR
n/a
n/a
CopyRight(不可编辑)
n/a
TCOP
copyright
-metadata copyright=”℗ lqsoft”
Composers
n/a
TCOM
composer
-metadata composer=”Joshua”
Conductors
n/a
TPE3
performer
-metadata performer=”Joshua”
Group description
Grouping
TIT1
TIT1
-metadata TIT1=”The Classics”
Mood
n/a
n/a
n/a
n/a
Part of set
Disc Number
TPOS
disc
-metadata disc=”1/2”
Initial key
n/a
TKEY
TKEY
-metadata TKEY=”G”
Beats-per-minute
BOM
TBPM
TBPM
-metadata TBPM=”120”
Part of a compilation
Part of a compilation
TCMP
n/a
n/a
n/a
n/a
TLAN
language
-metadata language=”eng”
n/a
n/a
TSSE
encoder
-metadata encoder=”iTunes v10”
我们主要会用到title:标题即歌曲名、artist:艺术家即歌手、album:专辑、date:发布时间、composer:作曲。然后呢,发现并没有给可怜的文山兄弟留下一个作词的位置。回头再看,似乎作词、作曲一般是在歌词文件中出现,音乐文件的标签似乎一般有个歌名和歌手就行了。
由于店家的资源文件名中带有数字编号:
01.牛仔很忙.wav
01.说了再见.wav
所以先写个脚本重命名一下,顺带导出歌曲名单:
import os
import re
pattern=[r"^[0-9]+\.",r"\.wav"]
dir='E:\\BaiduNetdiskDownload\\周杰伦'
os.chdir(dir)
raw_dir_list=os.listdir(dir)
dir_list=list()
for file in raw_dir_list:
tmp=re.sub(pattern[0],"",file)
str=re.sub(pattern[1],"",tmp)
dir_list.append(str)
os.rename(file,tmp)
with open("song_list.txt","w") as p:
for file in dir_list:
p.write(file+"\n")
名单效果如下(文件名则是带有“.wav”后缀):
七里香
世界末日
东风破
乔克叔叔
接下来是爬虫脚本:
from urllib import request
from urllib import parse
import re
import os
def getlist(file):
with open(file,"r") as p:
list=p.read().split("\n")
while '' in list:
list.remove('')
return list
def crawtext(url):
res=request.urlopen(url)
text=res.read().decode(encoding='utf-8', errors='strict')
return text
def isurl(patternlist,text):
if re.search(patternlist[0],text):
a=re.search(patternlist[1],text)
if a:
flag=0
else :
flag=2
else :
flag=1
return flag
def gettext(pattern,raw_text):
a=re.search(pattern,raw_text)
if a:
text=raw_text[a.span()[0]:a.span()[1]]
else :
text=False
return text
def geturl(pattern,patternlist,raw_text):
a=re.search(pattern,raw_text)
if a:
text=raw_text[a.span()[0]:a.span()[1]]
tmp=re.sub(patternlist[0],"",text)
url=re.sub(patternlist[1],"",tmp)
else :
url=False
return url
baseurl=r"baike.baidu.com/item/"
pattern1=['<li class="item">▪<span class="selected">','<li class="item">▪<span class="selected">.*周杰伦.*歌曲.*</span></li>']
pattern2='<meta name="description" content=".*">'
pattern3='<li class="item">▪<a title=".*周杰伦.*歌曲.*>'
pattern4=[".*href='/item/","'>.*"]
dir="E:\\BaiduNetdiskDownload\\周杰伦"
os.chdir(dir)
song_list=getlist("song_list.txt")
text_list=list()
for file in song_list:
name=re.sub(".wav","",file)
url=baseurl+parse.quote(name)
text=crawtext(url)
flag=isurl(pattern1,text)
if flag==0:
text_list.append(gettext(pattern2,text))
elif flag==1:
text=gettext(pattern2,text)
if text:
text_list.append(text)
else:
text_list.append(name+" error 1 ")
else :
key=geturl(pattern3,pattern4,text)
if key:
url=baseurl+key
text=crawtext(url)
text_list.append(gettext(pattern2,text))
else :
text_list.append(name+" error 2 ")
with open("text.txt","w") as p:
for str in text_list:
p.write(str+"\n")
import os
import pydub
def getlist(file):
with open(file,"r") as p:
list=p.read().split("\n")
while '' in list:
list.remove('')
return list
class SONG:
title=""
artist=""
album=""
date=""
composer=""
def __init__(self,title) :
self.title=title
dir="E:\\BaiduNetdiskDownload\\周杰伦"
os.chdir(dir)
os.mkdir("test")
lines=getlist("list.txt")
list=[]
for line in lines:
tmp=line.split("\t")
song=pydub.AudioSegment.from_wav(tmp[0]+".wav")
dic={"title":tmp[0],"artist":tmp[1],"album":tmp[2],"date":tmp[3],"composer":tmp[4]}
song.export("test\\"+tmp[0]+".flac",format="flac",tags=dic)
song.export()
Title
Title
TIT2
title
-metadata title=”海阔天空”
Subtitle
Description(Video tab)
TIT3
TIT3
-metadata TIT3=”beyond 20周年纪念版”
Rating
n/a
n/a
n/a
n/a
Comments
Comments
COMM
n/a
n/a
Contributing artists
Artist
TPE1
artist
-metadata artist=”黄家驹”
Album artist
Album artist
TPE2
album_artist
-metadata album_artist=”Josh Groban”
Album
Album
TALB
album
-metadata album=”Closer”
Year
Year
TYER
date
-metadata date=”2009”
#
Track Number
TRCK
track
-metadata track=”3/12”(12首歌中的第3个)
Genre
Genre
TCON
genre
-metadata genre=”Vocal”
Publisher
n/a
TPUB
publisher
-metadata publisher=”Heaven Church”
Encoded by
n/a
TENC
encoded_by
-metadata encoded_by=”Joshua”
Aythor URL
n/a
WOAR
n/a
n/a
CopyRight(不可编辑)
n/a
TCOP
copyright
-metadata copyright=”℗ lqsoft”
Composers
n/a
TCOM
composer
-metadata composer=”Joshua”
Conductors
n/a
TPE3
performer
-metadata performer=”Joshua”
Group description
Grouping
TIT1
TIT1
-metadata TIT1=”The Classics”
Mood
n/a
n/a
n/a
n/a
Part of set
Disc Number
TPOS
disc
-metadata disc=”1/2”
Initial key
n/a
TKEY
TKEY
-metadata TKEY=”G”
Beats-per-minute
BOM
TBPM
TBPM
-metadata TBPM=”120”
Part of a compilation
Part of a compilation
TCMP
n/a
n/a
n/a
n/a
TLAN
language
-metadata language=”eng”
n/a
n/a
TSSE
encoder
-metadata encoder=”iTunes v10”
我们主要会用到title:标题即歌曲名、artist:艺术家即歌手、album:专辑、date:发布时间、composer:作曲。然后呢,发现并没有给可怜的文山兄弟留下一个作词的位置。回头再看,似乎作词、作曲一般是在歌词文件中出现,音乐文件的标签似乎一般有个歌名和歌手就行了。
由于店家的资源文件名中带有数字编号:
01.牛仔很忙.wav
01.说了再见.wav
所以先写个脚本重命名一下,顺带导出歌曲名单:
import os
import re
pattern=[r"^[0-9]+\.",r"\.wav"]
dir='E:\\BaiduNetdiskDownload\\周杰伦'
os.chdir(dir)
raw_dir_list=os.listdir(dir)
dir_list=list()
for file in raw_dir_list:
tmp=re.sub(pattern[0],"",file)
str=re.sub(pattern[1],"",tmp)
dir_list.append(str)
os.rename(file,tmp)
with open("song_list.txt","w") as p:
for file in dir_list:
p.write(file+"\n")
名单效果如下(文件名则是带有“.wav”后缀):
七里香
世界末日
东风破
乔克叔叔
接下来是爬虫脚本:
from urllib import request
from urllib import parse
import re
import os
def getlist(file):
with open(file,"r") as p:
list=p.read().split("\n")
while '' in list:
list.remove('')
return list
def crawtext(url):
res=request.urlopen(url)
text=res.read().decode(encoding='utf-8', errors='strict')
return text
def isurl(patternlist,text):
if re.search(patternlist[0],text):
a=re.search(patternlist[1],text)
if a:
flag=0
else :
flag=2
else :
flag=1
return flag
def gettext(pattern,raw_text):
a=re.search(pattern,raw_text)
if a:
text=raw_text[a.span()[0]:a.span()[1]]
else :
text=False
return text
def geturl(pattern,patternlist,raw_text):
a=re.search(pattern,raw_text)
if a:
text=raw_text[a.span()[0]:a.span()[1]]
tmp=re.sub(patternlist[0],"",text)
url=re.sub(patternlist[1],"",tmp)
else :
url=False
return url
baseurl=r"baike.baidu.com/item/"
pattern1=['<li class="item">▪<span class="selected">','<li class="item">▪<span class="selected">.*周杰伦.*歌曲.*</span></li>']
pattern2='<meta name="description" content=".*">'
pattern3='<li class="item">▪<a title=".*周杰伦.*歌曲.*>'
pattern4=[".*href='/item/","'>.*"]
dir="E:\\BaiduNetdiskDownload\\周杰伦"
os.chdir(dir)
song_list=getlist("song_list.txt")
text_list=list()
for file in song_list:
name=re.sub(".wav","",file)
url=baseurl+parse.quote(name)
text=crawtext(url)
flag=isurl(pattern1,text)
if flag==0:
text_list.append(gettext(pattern2,text))
elif flag==1:
text=gettext(pattern2,text)
if text:
text_list.append(text)
else:
text_list.append(name+" error 1 ")
else :
key=geturl(pattern3,pattern4,text)
if key:
url=baseurl+key
text=crawtext(url)
text_list.append(gettext(pattern2,text))
else :
text_list.append(name+" error 2 ")
with open("text.txt","w") as p:
for str in text_list:
p.write(str+"\n")
import os
import pydub
def getlist(file):
with open(file,"r") as p:
list=p.read().split("\n")
while '' in list:
list.remove('')
return list
class SONG:
title=""
artist=""
album=""
date=""
composer=""
def __init__(self,title) :
self.title=title
dir="E:\\BaiduNetdiskDownload\\周杰伦"
os.chdir(dir)
os.mkdir("test")
lines=getlist("list.txt")
list=[]
for line in lines:
tmp=line.split("\t")
song=pydub.AudioSegment.from_wav(tmp[0]+".wav")
dic={"title":tmp[0],"artist":tmp[1],"album":tmp[2],"date":tmp[3],"composer":tmp[4]}
song.export("test\\"+tmp[0]+".flac",format="flac",tags=dic)
song.export()