《Think Python》（3）：列表、字典、元组

Posted on 2015-01-11 | In Python

第十章列表

列表是可变的，列表中的值称为元素（element），或列表项（item）。
列表添加元素（返回值均为none）：
- append可以在列表尾部添加新的元素：t.append(str)；
- extend可以接受一个列表作为一个参数，并将其所有元素添加到列表中：t1.extend(t2)；
- t = t + [x]（需要新建list，耗时太长）。
列表删除元素：
- pop修改列表，并换回被删除掉的值。如果不提供下标，它会删除并返回最后一个元素：x = t.pop(index)
- del操作符：del t[n]
- remove删除列表中的某个元素，返回值为none：t.remove(element)

#Chapter 10
#2014-12-11
#Exercise 10-1
def nested_sum(t):
    for i in range(len(t)):
        if type(t[i]) == list:
            t[i] = nested_sum(t[i]) # 递归
    return sum(t)
#Exercise 10-2
def capitalize_all(t):
    res = []
    for s in t:
        res.append(s.capitalize())
    return res
def capitalize_nested(t):
    res = []
    for i in range(len(t)):
        if type(t[i]) == list:
            t[i] = capitalize_nested(t[i])
            res.append(t[i]) # Do not forget
        else:
            res.append(t[i].capitalize())
    return res
#Exercise 10-3
def list_sum(t):
    res = []
    for i in range(len(t)):  
        new_element = sum(t[:i+1])   
        res.append(new_element)
    return res
#Exercise 10-4
def middle(t): 
    return t[1:len(t)-1]
#Exercise 10-5
def chop(t):
    del t[0]
    del t[len(t)-1]
"""
t = [1, 2, 3, 4]
chop(t)
print t
"""
#Exercise 10-6
def is_sorted(t):
    for i in range(len(t)-1):
        if t[i] &gt; t[i+1]:
            return False
    return True
#Exercise 10-7
def is_anagram(a, b):
    c = list(a)
    d = list(b)
    count = 0
    if len(a) == len(b):
        for i in c:
            if i not in b:
                count += 1
        for j in d:
            if j not in c:
                count += 1
    if count == 0:
        return True
    else:
        return False
#Exercise 10-8
def has_duplicates(t):
    for i in range(len(t)-1):
        if t[i] in t[i+1:]:
            return True
    return False
def has_duplicates_demo(t):
    """Returns True if any element appears more than once in (t),
    False otherwise."""
    s = t[:]
    s.sort()
    for i in range(len(s)-1):
        if s[i] == s[i+1]:
            return True
    return False
import random
def one_birthday():
    month = random.randint(1, 12)
    if month == 1 or 3 or 5 or 7 or 8 or 10 or 12:
        day = random.randint(1, 31)
    elif month == 2:
        day = random.randint(1, 28)
    else:
        day = random.randint(1, 30)
    birthday = str(month) + '-' + str(day)
    return birthday
def multi_birthday(n):
    t = []
    for i in range(n):
        t.append(one_birthday())
    return t
def percent_same(students, sample):
    count = 0
    for i in range(sample):
        t = multi_birthday(students)
        if has_duplicates(t):
            count += 1
    percent = float(count)/sample
    print percent
#2014-12-12
#Exercise 10-9
def remove_duplicates(t):
    res = []
    t.sort()
    for i in range(len(t)-1):
        if t[i] != t[i+1]:
            res.append(t[i])
    res.append(t[-1])
    return res
#Exercise 10-10
import time
def word_list_append():
    start_time = time.time()
    res = []
    fin = open('words.txt')
    for line in fin:
        word = line.strip()
        res.append(word)
    end_time = time.time()
    run_time = end_time - start_time
    return run_time
def word_list_append2():
    start_time = time.clock()
    res = []
    fin = open('words.txt')
    for line in fin:
        word = line.strip()
        res.append(word)
    end_time = time.clock()
    run_time = end_time - start_time
    return run_time
def word_list_plus():
    start_time = time.clock()
    res = []
    fin = open('words.txt')
    for line in fin:
        word = line.strip()
        res = res + [word]
    end_time = time.clock()
    run_time = end_time - start_time
    return run_time

第十一章字典

字典是可变的，键必须是可散列的（不可变类似，如整数、浮点数、字符串、元组等），字典中各项的顺序是不可预料的。
in操作符可以用在字典上，告诉你某个值是不是字典的键：
1
2
d = dict()
key in d

字典的一些方法：

get(key[, default]) ：如果key是字典的键，则返回这个键对于的值，否则返回default

#Exercise 11-2
def histogram(s):
    d = dict()
    for c in s:
        d[c] = d.get(c, 0) + 1
    return d
#2014-12-16
#Exercise 11-1
def search_dict(t):
    fin = open('words.txt')
    new_dict = dict()
    for line in fin:
        word = line.strip()
        new_dict[word] = ''
    if t in new_dict:
        return True
    else:
        return Flase
from bisect import bisect_left
def search_list(t):
    word_list = []
    fin = open('words.txt')
    for line in fin:
        word = line.strip()
        word_list.append(word)
    i = bisect_left(word_list, t)
    if i != len(word_list) and word_list[i] == t:
        return True
    else:
        return False
'''dict: faster
import time
start = time.time()
print search_list('zymases')
end = time.time()
print end-start
'''
#Exercise 11-2
def histogram(s):
    d = dict()
    for c in s:
        d[c] = d.get(c, 0) + 1
    return d
#Exercise 11-3
def print_hist(h):
    h_keys = h.keys()
    n = len(h_keys)
    for i in range(n):
        print i, h_keys[i], h[h_keys[i]]
'''
h = histogram('parrot')
print_hist(h)
'''
#2014-12-22
#Exercise 11-4
def reverse_lookup(d, v):
    key_list = []
    for key in d:
        if d[key] == v:
            key_list.append(key)
    return key_list
'''
h = histogram('parrot')
print h
print reverse_lookup(h, 3)
'''
#Exercise 11-5
def invert_dict(d):
    inverse = dict()
    for key in d:
        val = d[key]
        inverse.setdefault(val, []).append(key)
    return inverse
'''
hist = histogram('parrot')
print hist
inverse = invert_dict(hist)
print inverse
'''
'''setdefault(key[, default]) 
If key is in the dictionary, return its value. （这个例子中返回的值为list）
If not, insert key with a value of default and return default. 
default defaults to None.
'''
#Exercise 11-6
known = {0:0, 1:1}
def fibonacci(n):
    if n in known:
        return known[n]
    res = fibonacci(n-1) + fibonacci(n-2)
    known[n] = res
    return res
#2014-12-24
def fibonacci_origin(n):
    if n == 0:
        return 0
    elif n ==1:
        return 1
    else: 
        return fibonacci_origin(n-1) + fibonacci_origin(n-2)
'''speed:fibonacci(n) &gt; fibonacci_origin(n)
import time
start = time.clock()
fibonacci(22)
end = time.clock()
print end-start
start_origin = time.clock()
fibonacci_origin(22)
end_origin = time.clock()
print end_origin - start_origin
'''
#Exercise 11-7
def ack(m, n):
    if m == 0:
        return n + 1
    if m &gt; 0 and n == 0:
        return ack(m-1, 1)
    if m &gt; 0 and n &gt; 0:
        return ack(m-1, ack(m, n-1))
#2014-12-25
ack_res = {}
def ack_memo(m, n):
    if m == 0:
        return n + 1
    if m &gt; 0 and n ==0:
        return ack_memo(m-1, 1)
    try:
        return ack_res[m, n]
    except KeyError:
        ack_res[m, n] = ack_memo(m-1, ack_memo(m, n-1))
        return ack_res[m, n] #type:tuple
#Exercise 11-9
def has_duplicates_list(t):
    s = t[:]
    s.sort()
    for i in range(len(s)-1):
        if s[i] == s[i+1]:
            return True
    return False
def has_duplicates_dict(t):
    d = dict()
    for element in t:
        d[element] = d.get(element, 0) + 1
        if d[element] &gt; 1:
            return True
    return False
#Exercise 11-10
def rotate_letter(letter, n):
    if letter.isupper():
        start = ord('A')
    elif letter.islower():
        start = ord('a')
    else:
        return letter
    c = ord(letter) - start
    i = (c + n) % 26 + start
    return chr(i)
def rotate_word(word, n):
    res = ''
    for letter in word:
        res += rotate_letter(letter, n)
    return res
def make_dict():
    d = dict()
    fin = open('words.txt')
    for line in fin:
        word = line.strip().lower()
        d[word] = word
    return d
def rotate_pairs(word, word_dict):
    for i in range(1,14): #1-13 26letters
        rotated = rotate_word(word, i)
        if rotated in word_dict:
            print word, i, rotated
def print_pairs():
    word_dict = make_dict()
    for key in word_dict:
        rotate_pairs(key, word_dict)
#Exercise 11-11
from pronounce import read_dictionary
def check_pronounce(a, b, pronounce_dict):
    if a not in pronounce_dict or b not in pronounce_dict:
        return False
    return pronounce_dict[a] == pronounce_dict[b]
def check_word(word, word_dict, pronounce_dict):
    word1 = word[1:] 
    if word1 not in word_dict:
        return False
    if not check_pronounce(word, word1, pronounce_dict):
        return False
    word2 = word[0] + word[2:]
    if word2 not in word_dict:
        return False
    if not check_pronounce(word, word2, pronounce_dict):
        return False
    return True
'''
pronounce_dict = read_dictionary()
word_dict = make_dict() #Exercise 11-10
for word in word_dict:
        if check_word(word, word_dict, pronounce_dict):
            print word, word[1:], word[0] + word[2:]
'''

第十二章元组

元组是不可变的，常作为字典的键

DSU模式：

修饰（Decorate）：构建一个元组列表，在序列元素之前放置一个或多个排序键。

排序（Sort）：给这个序列排序，并。

#2014-12-25
#Exercise 12-1
def sumall(*args):
    sum = 0
    for item in args:
        sum += item
    return sum
#Exercise 12-2
def my_unstable_sort(words):
    d = dict()
    for word in words:
        if len(word) not in d:
            d[len(word)] = [word]
        else:
            d[len(word)].append(word)      
    res = []
    t = d.items()
    for index, value in t:
        for element in value:
            res.append(element)
    return res
import random        
def demo_sort_by_length_random(words):     
    t = []
    for word in words:
        t.append((len(word), random.random, word)) #Important!!!
    t.sort(reverse=True)
    res = []
    for length, _, word in t: # _ is random
        res.append(word)
    return res
#2014-12-26
#Exercise 12-3
def dict_list(d):
    new_list = list()
    for key in d:
        new_list.append((d[key], key))
    return new_list
def most_frequent(s):
    d = dict()
    for letter in s:
        d[letter] = d.get(letter, 0) + 1
    t = dict_list(d)
    t.sort(reverse=True)
    res = []
    for frequency, letter in t:
        res.append(letter)
    return res
#2014-12-27
#Exercise 12-4
def word_tuple(word):
    new_list = list(word)
    new_list.sort()
    return tuple(new_list)
def build_dict():
    fin = open('words.txt')
    d = dict()
    for line in fin:
        word = line.strip().lower()
        key = word_tuple(word)
        if key not in d:
            d[key] = [word]
        else:
            d[key].append(word)
    return d
def print_dict(d):
    new_list = list()
    for key in d:
        new_list.append((len(d[key]), d[key]))
    new_list.sort(reverse=True)
    for length, res in new_list:
        print res
def find_bingo():
    d = build_dict()
    res = {}
    for key, value in d.iteritems():
        if len(key) == 8:
            res[key] = value
    print_dict(res)
#Exercise 12-5
def check_double_reverse(a, b):
    if len(a) != len(b):
        return False
    else:
        count = 0
        for i in range(len(a)):
            if a[i] != b[i]:
                count += 1
        if count == 2:
            return True
        else:
            return False
def print_reverse():
    d = build_dict()
    for words in d.itervalues():
        for word1 in words:
            for word2 in words:
                if word1 &lt; word2 and check_double_reverse(word1, word2):
                    print word1, word2
#Exercise 12-6
def word_2_sub_list(word):
    new_list = list()
    for i in range(len(word)):
        new_word = word[:i] + word[i+1:]
        new_list.append(new_word)
    return new_list
def check_reduce(word, d):
    new_list = word_2_sub_list(word)
    for element in new_list:
        if element == '':
            return True
        if element in d:
            return check_reduce(element, d)
        else:
            return False
def find_reduce():
    fin = open('words.txt')
    d = dict()
    for line in fin:
        word = line.strip().lower()
        d[word] = ''
    res = []
    for key in d:
        if check_reduce(key, d):
            res.append((len(key), key))
    res.sort(reverse=True)
    for element in res:
        print element

第十三章选择数据结构

#2014-12-31
#Exercise 13-1
import string
def process_file(filename):
    hist = dict()
    fin = open(filename)
    for line in fin:
        process_line(line, hist)
    return hist
def process_line(line, hist):
    line = line.replace('-', '')
    for word in line.split():
        word = word.strip(string.punctuation + string.whitespace)
        word = word.lower()
        hist[word] = hist.get(word, 0) + 1
#Exercise 13-2
def total_words(hist):
    return sum(hist.values())
def different_words(hist):
    return len(hist)
def most_freq(hist):
    t = []
    for word, freq in hist.items():
        t.append((freq, word))
    t.sort(reverse=True)
    return t
#Exercise 13-3
'''
hist = process_file('emma.txt')
t = most_freq(hist)
print t[0:20]
'''
#Exercise 13-4
def subtract(d1, d2):
    res = dict()
    for key in d1:
        if key not in d2:
            res[key] = None
    return res
#Exercise 13-5
import random
def histogram(s):
    d = dict()
    for c in s:
        d[c] = d.get(c, 0) + 1
    return d
def choose_from_hist(hist):
    t = []
    for word, freq in hist.items():
        t.extend([word] * freq)
    return random.choice(t)
#Exercise 13-6
def subtract_set(d1, d2):    
    return set(d1) - set(d2)
#Exercise 13-7
import random
from bisect import bisect
def list_sum(t): #10-3
    res = []
    for i in range(len(t)):  
        new_element = sum(t[:i+1])   
        res.append(new_element)
    return res
def random_word(hist):
    word_list = hist.keys()   
    freq_list = hist.values()
    freq_sum = list_sum(freq_list)  
    n = freq_sum[-1]  
    x = random.randint(0, n-1)
    index = bisect(freq_list, x)  
    return word_list[index]
#2015-01-02
#Exercise 13-8
#Can not get it done

总结

这一块还是蛮复杂的，列表、字典和元组之间的区别与联系，交叉使用，如何根据需求选择合适的数据类型，需要多练习。