利用豆瓣FM的API抓取豆瓣音乐

# -*- coding: utf-8 -*-
__author__ = 'zhan'

#该程序为测试豆瓣FM的程序
import json
import urllib
import urllib2
import requests
import requests.exceptions
import traceback

DOUBAN_CHANNEL_UUID_FORMAT = 'douban-%d'    # % (channel_id)
DOUBAN_MUSIC_UUID_FORMAT = 'douban-%d-%d'   # % (aid, sid)
_user_id = None
_token = None
_expire = None

#登陆获取Token 和 Expire
def login():
    payload = {'app_name': 'radio_desktop_win',
              'version' : '100',
              'email': '***',
              'password' : "***"}
    try:
        r = requests.post("http://www.douban.com/j/app/login", data=payload)
    except requests.exceptions.ConnectionError:
        return False
    except requests.exceptions.Timeout:
        return False
    r = json.loads(r.text)
    if r['r'] != 0:
        print 'spider.douban.login: failed. r=', r
        return False
    global _user_id, _token, _expire
    _user_id = r['user_id']
    _token = r['token']
    _expire = r['expire']
    return True

#获取频道列表
def update_channel_list():
    r = requests.get("http://www.douban.com/j/app/radio/channels")
    r = json.loads(r.text)
    channel_list = []
    assert 'channels' in r
    for channel in r['channels']:
        cid = int(channel['channel_id'])
        uuid = DOUBAN_CHANNEL_UUID_FORMAT % (cid)
        if cid != 0:
            # not private list and not in db
            new_channel = channel['name']
            channel_list.append(new_channel)
    return channel_list

def update_channel_once(channel, max_num=10):
    '''update music in channel. max is the max number it will update
    return updated music
    please login before this function'''
    global _user_id, _token, _expire
    # TODO
    # maybe need a better method to assert and get cid

    payload = {'app_name': 'radio_desktop_win',
               'version': '100',
               'user_id': _user_id,
                'expire': _expire,
                'token': _token,
                'channel': channel,
                'type': 'n'}

    try:
        print 'getting list'
        r = requests.get("http://www.douban.com/j/app/radio/people", params=payload, timeout=5)
    except requests.exceptions.ConnectionError:
        traceback.print_exc()
        return []
    except requests.exceptions.Timeout:
        traceback.print_exc()
        return []
    r = json.loads(r.text)
    assert r['r'] == 0
    update_music = []
    #channel_music_list = channel.music_list
    for song in r['song']:
        try:
            uuid = DOUBAN_MUSIC_UUID_FORMAT % (int(song['aid']), int(song['sid']))
        except:
            # ads
            continue
        print uuid
        music = None
        try:
            print 'getting song'
            cover_fd = requests.get(song['picture'], stream=True, timeout=5).raw
            audio_fd = requests.get(song['url'], stream=True, timeout=5).raw
        except requests.exceptions.ConnectionError:
            traceback.print_exc()
            continue
        except requests.exceptions.Timeout:
            traceback.print_exc()
            continue
        print(song['title'], song['artist'], song['albumtitle'],
                song['company'], song['public_time'], song['kbps'],
                cover_fd, audio_fd, uuid)

    return update_music

login()
channels = update_channel_list();
for channel in channels:
    print channel

print '--------------------------'

musics = update_channel_once(1,10)