diff --git a/.gitignore b/.gitignore
index 37fc9d4..00f0cc6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -88,3 +88,7 @@ ENV/
 
 # Rope project settings
 .ropeproject
+# celery-beat
+*.bak
+*.dat
+*.dir
diff --git a/Algorithm/README.md b/Algorithm/README.md
new file mode 100644
index 0000000..ecff713
--- /dev/null
+++ b/Algorithm/README.md
@@ -0,0 +1,6 @@
+1. [约瑟夫环](https://github.com/lambdaplus/python/blob/master/Algorithm/joseph-ring.py)
+2. [寻找两个链表的第一个交点](https://www.zybuluo.com/Scrazy/note/719335)
+3. [删除链表中的重复元素](https://www.zybuluo.com/Scrazy/note/720542)
+4. [数组中的数组成最小值](https://www.zybuluo.com/Scrazy/note/720582)
+5. [索引为index 的丑数](https://www.zybuluo.com/Scrazy/note/720587)
+6. [反转链表](https://www.zybuluo.com/Scrazy/note/721436)
diff --git a/Algorithm/arry.md b/Algorithm/arry.md
deleted file mode 100644
index bd91a35..0000000
--- a/Algorithm/arry.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# 题目
-调整数组顺序使奇数位于偶数前面
-## 描述
-输入一个整数数组，实现一个函数来调整该数组中数字的顺序，使得所有的奇数位于数组的前半部分，所有的偶数位于位于数组的后半部分，并保证奇数和奇数，偶数和偶数之间的相对位置不变。
-
-马上熄灯啦，直接贴渣渣代码！！
-```python
-# -*- coding: utf-8 -*-
-import random
-
-
-def foo(L):
-    L1 = []
-    L2 = []
-    for i in L:
-        if i % 2 != 0:
-            L1.append(i)
-        else:
-            L2.append(i)
-
-    return L1 + L2
-
-if __name__ == '__main__':
-    L = [random.randrange(100) for _ in range(10)]
-    print(L)
-    print(foo(L))
-
-```
-代码很渣，有空继续优化！！
-就酱。。。洗裤子去啦。。。。。。。。
diff --git a/Algorithm/binary_search.md b/Algorithm/binary_search.md
index 4a9ab49..9418fb7 100644
--- a/Algorithm/binary_search.md
+++ b/Algorithm/binary_search.md
@@ -1,5 +1,4 @@
-﻿# 二分法查找
-
+# 二分法查找
 
 二分法查找，顾名思义，二分、二分就是分成两半呗。(有的翻译是`折半法搜索`比如SICP里翻译的就是`折半法搜索`)。它的复杂度为O(logn)，在列表(已排序)中对给定值`value`进行查找并输出其索引(index)值。
 
@@ -9,15 +8,14 @@
 
 def binary_search(lst, value):
     left, right = 0, len(lst) - 1
-    
+
     while left <= right:
-        middle = int((left + right) / 2) # 取`lst`中值索引
-        
+        middle = (left + right) // 2 # 取`lst`中值索引
+
         if value > lst[middle]:
             left = middle + 1 # value大于`lst`中值，让左边界等于 middle + 1
         elif value < lst[middle]:
             right = middle - 1 # 类似上
-            
         else:
             return "The value's index is {}".format(middle)
     return "There is no {}".format(value)
@@ -26,16 +24,16 @@ if __name__ == '__main__':
     lst = [1, 3, 5, 7, 9]
     value = int(input("Please input the value(1-10): "))
     print(binary_search(lst, value))
-
 ```
 
 再来个递归(recursion)版的吧， 不作过多解释啦！
+
 ```python
 # -*- coding: utf-8 -*-
 
 
 def binary_search_rec(lst, value, left, right):
-    middle = int((left + right) / 2)
+    middle = (left + right) // 2
 
     if left > right:
         return "I'm sorry, there is no {}".format(value)
@@ -44,7 +42,7 @@ def binary_search_rec(lst, value, left, right):
         return binary_search_rec(lst, value, left, middle - 1)
 
     elif value > lst[middle]:
-    	return binary_search_rec(lst, value, middle + 1, right)
+        return binary_search_rec(lst, value, middle + 1, right)
 
     else:
         return "Congratulations, the value's({}) index is {}".format(value, middle)
diff --git a/Algorithm/joseph-ring.py b/Algorithm/joseph-ring.py
new file mode 100644
index 0000000..46dd733
--- /dev/null
+++ b/Algorithm/joseph-ring.py
@@ -0,0 +1,16 @@
+# coding: utf-8
+# %load python/Algorithm/joseph-ring.py
+def joseph_ring(n, m):
+    if n < 1:
+        return -1
+    
+    result = -1
+    start = 0
+
+    ring_num = list(range(n))
+    while ring_num:
+        k = (start + m - 1) % n
+        result = ring_num.pop(k)
+        n -= 1
+        start = k
+    return result
diff --git a/Algorithm/merge-two-list.md b/Algorithm/merge-two-list.md
index 7453e67..acb55eb 100644
--- a/Algorithm/merge-two-list.md
+++ b/Algorithm/merge-two-list.md
@@ -8,22 +8,19 @@
 from random import randrange
 
 def merge_two_list(lst1, lst2):
+    rst = []
     while lst1 and lst2:
-        lst3.append(lst1.pop(0) if lst1[0] <= lst2[0] else lst2.pop(0))
-
-    while lst1:
-        lst3.append(lst1.pop(0))
-
-    while lst2:
-        lst3.append(lst2.pop(0))
-
-    return lst3
+        if lst1[-1] > lst2[-1]:
+            rst.append(lst1.pop())
+        else:
+            rst.append(lst2.pop())
+    rst.reverse()        
+    return (lst1 or lst2) + rst
 
 if __name__ == "__main__":
-    lst3 = []
     lst1 = sorted([randrange(100) for _ in range(10)])
-    print(lst1)
+    print('lst1 is : ', lst1)
     lst2 = sorted([randrange(100) for _ in range(6)])
-    print(lst2)
-    print(merge_two_list(lst1, lst2))
+    print('lst2 is : ', lst2)
+    print('The merged list is\n', merge_two_list(lst1, lst2))
 ```
diff --git a/Algorithm/reverse_print_link_list.py b/Algorithm/reverse_print_link_list.py
new file mode 100644
index 0000000..0a270bc
--- /dev/null
+++ b/Algorithm/reverse_print_link_list.py
@@ -0,0 +1,18 @@
+# -*- coding:utf-8 -*-
+class ListNode:
+    def __init__(self, x):
+        self.val = x
+        self.next = None
+
+class Solution:
+    # 返回从尾部到头部的列表值序列，例如[1,2,3]
+    def printListFromTailToHead(self, listNode):
+        # write code here
+        res = []
+        if listNode is None:
+            return res
+        while listNode:
+            res.append(listNode.val)
+            listNode = listNode.next
+        res.reverse()
+        return res
diff --git a/Algorithm/sort/bubble-sort.py b/Algorithm/sort/bubble-sort.py
index db27018..d91f215 100644
--- a/Algorithm/sort/bubble-sort.py
+++ b/Algorithm/sort/bubble-sort.py
@@ -7,7 +7,7 @@ def bubble_sort(L):
     冒泡排序主要使用两次循环实现排序。
     外循环中的一个数字依次与内层循环中的每个数字进行比较，如果索引值小的数字大于索引值大的数字，交换位置。否则，位置不变。直至外循环结束。
     '''
-    if len(L) < 2: # 列表内元素低于2，直接返回！
+    if len(L) < 2:  # 列表内元素低于2，直接返回！
         return L
     for i in range(len(L)):
         for j in range(1, len(L)):
@@ -15,6 +15,7 @@ def bubble_sort(L):
                 L[j - 1], L[j] = L[j], L[j - 1]
     return L
 
+
 if __name__ == '__main__':
     L = [randrange(1000) for _ in range(10)]
     print(bubble_sort(L))
diff --git a/Algorithm/sort/insert_sort.py b/Algorithm/sort/insert_sort.py
index 29681f6..27b69f0 100644
--- a/Algorithm/sort/insert_sort.py
+++ b/Algorithm/sort/insert_sort.py
@@ -1,9 +1,12 @@
 # coding=utf-8
+
 from random import randrange
 
+
 def insert_sort(L):
     if len(L) < 2:
         return L
+
     for i in range(1, len(L)):
         tmp = L[i]
         j = i - 1
@@ -14,6 +17,6 @@ def insert_sort(L):
 
     return L
 
-#if __name__ == '__main__':
+# if __name__ == '__main__':
 #    L = [randrange(1000) for _ in range(10)]
 #    print(insert_sort(L))
diff --git a/Algorithm/sort/merge/merge-sort.py b/Algorithm/sort/merge/merge-sort.py
index b6045ad..d2947fb 100644
--- a/Algorithm/sort/merge/merge-sort.py
+++ b/Algorithm/sort/merge/merge-sort.py
@@ -1,6 +1,7 @@
 # -*-coding: utf-8 -*-
 from random import randrange
 
+
 def merge_sort(L):
     if len(L) < 2:
         return L
@@ -9,21 +10,17 @@ def merge(left, right):
         merged = []
 
         while left and right:
-            merged.append(left.pop(0) if left[0] <= right[0] else right.pop(0))
-
-        while left:
-            merged.append(left.pop(0))
+            merged.append(left.pop(0) if left[0] <= right[0]
+                          else right.pop(0))
 
-        while right:
-            merged.append(right.pop(0))
-        
-        return merged
+        return merged + (left or right)
 
-    mid = int(len(L)/2)
+    mid = len(L) // 2
     left = merge_sort(L[:mid])
     right = merge_sort(L[mid:])
     return merge(left, right)
 
+
 if __name__ == "__main__":
     L = [randrange(1000) for _ in range(10)]
     print(merge_sort(L))
diff --git a/Algorithm/sort/merge/merge-sort1.py b/Algorithm/sort/merge/merge-sort1.py
index c223c61..bd82a5b 100644
--- a/Algorithm/sort/merge/merge-sort1.py
+++ b/Algorithm/sort/merge/merge-sort1.py
@@ -7,29 +7,32 @@
 
 merge(*iterables, key=None, reverse=False)
     Merge multiple sorted inputs into a single sorted output.
-    
+
     Similar to sorted(itertools.chain(*iterables)) but returns a generator,
     does not pull the data into memory all at once, and assumes that each of
     the input streams is already sorted (smallest to largest).
-    
+
     >>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]))
     [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]
-    
+
     If *key* is not None, applies a key function to each element to determine
     its sort order.
-    
+
     >>> list(merge(['dog', 'horse'], ['cat', 'fish', 'kangaroo'], key=len))
     ['dog', 'cat', 'fish', 'horse', 'kangaroo']
 '''
+
+
 def merge_sort(L):
     if len(L) < 2:
         return L
 
-    mid = int(len(L)/2)
+    mid = len(L) // 2
     left = merge_sort(L[:mid])
     right = merge_sort(L[mid:])
     return list(merge(left, right))
 
+
 if __name__ == "__main__":
     L = [randrange(100) for _ in range(10)]
     print(merge_sort(L))
diff --git a/Algorithm/sort/merge/merge-sort2.py b/Algorithm/sort/merge/merge-sort2.py
new file mode 100644
index 0000000..0fbc286
--- /dev/null
+++ b/Algorithm/sort/merge/merge-sort2.py
@@ -0,0 +1,26 @@
+# coding=utf-8
+# 性能更好
+from random import randrange
+
+
+def merge_sort(seq):
+    mid = len(seq) // 2
+    lft, rgt = seq[:mid], seq[mid:]
+    if len(lft) > 1:
+        lft = merge_sort(lft)
+    if len(rgt) > 1:
+        rgt = merge_sort(rgt)
+
+    res = []
+    while lft and rgt:
+        if lft[-1] >= rgt[-1]:  # 取lft和rgt序列中最大的值
+            res.append(lft.pop())
+        else:
+            res.append(rgt.pop())
+    res.reverse()              # 反序一下
+    return (lft or rgt) + res
+
+
+if __name__ == '__main__':
+    seq = [randrange(100) for _ in range(10)]
+    print(merge_sort(seq))
diff --git a/Algorithm/sort/quick-sort.py b/Algorithm/sort/quick-sort.py
index 67fd1f2..2efc0c3 100644
--- a/Algorithm/sort/quick-sort.py
+++ b/Algorithm/sort/quick-sort.py
@@ -1,15 +1,17 @@
 # coding=utf-8
 import random
 
+
 def quick_sort(seq):
     if len(seq) < 2:
         return seq
 
     mid = random.choice(seq)
-    small = [x for x in seq if x < mid]
+    small = [x for x in seq if x <= mid]
     big = [x for x in seq if x > mid]
     return quick_sort(small) + [mid] + quick_sort(big)
 
+
 if __name__ == '__main__':
     L = [random.randrange(1000) for _ in range(10)]
     print(quick_sort(L))
diff --git a/Algorithm/sort/select-sort.py b/Algorithm/sort/select-sort.py
new file mode 100644
index 0000000..fe2e67c
--- /dev/null
+++ b/Algorithm/sort/select-sort.py
@@ -0,0 +1,21 @@
+# coding=utf-8
+from random import randrange
+
+
+def select_sort(seq):
+    if len(seq) < 2:
+        return seq
+
+    for i in range(len(seq)-1, 0, -1):
+        max_j = i
+        for j in range(i):
+            if seq[j] > seq[max_j]:
+                max_j = j
+        seq[i], seq[max_j] = seq[max_j], seq[i]
+
+    return seq
+
+
+if __name__ == '__main__':
+    seq = [randrange(100) for _ in range(10)]
+    print(select_sort(seq))
diff --git a/Algorithm/sort/shell-sort.py b/Algorithm/sort/shell-sort.py
new file mode 100644
index 0000000..7f35d1d
--- /dev/null
+++ b/Algorithm/sort/shell-sort.py
@@ -0,0 +1,18 @@
+#coding=utf-8
+def shell_sort(seq):
+    if len(seq) < 2:
+        return seq
+
+    n = len(seq)
+    mid = n // 2
+    while mid > 0:
+        for i in range(mid, n):
+            tmp = seq[i]
+            j = i
+            while j >= mid and seq[j-mid] > tmp:
+                seq[j] = seq[j-mid]
+                j -= mid
+            seq[j] = tmp
+        mid = mid // 2
+    return seq
+
diff --git a/README.md b/README.md
index ce619f3..045d46e 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,16 @@
 # Python
-python练习中的一些代码，以防重装系统再次丢失
 
 1. [几种排序算法](https://github.com/lambdaplus/python/tree/master/Algorithm/sort)
 2. [翻转列表](https://github.com/lambdaplus/python/blob/master/resver.md) 
 3. [二分法查找](https://github.com/lambdaplus/python/blob/master/Algorithm/binary_search.md)
+4. [一个异步爬虫](https://github.com/lambdaplus/python/blob/master/%E4%B8%80%E4%B8%AA%E5%BC%82%E6%AD%A5%E7%88%AC%E8%99%AB.md)
+5. [豆瓣电影Top250爬虫](https://github.com/lambdaplus/python/blob/master/%E8%B1%86%E7%93%A3%E7%94%B5%E5%BD%B1Top250%20%E7%88%AC%E8%99%AB.md)
+6. [装饰器](https://www.zybuluo.com/Scrazy/note/551565)
+7. [二叉树](https://www.zybuluo.com/Scrazy/note/390264)
+8. [Celery](https://www.zybuluo.com/Scrazy/note/697561)
+9. [RabbitMQ](https://www.zybuluo.com/Scrazy/note/699512)
+10. [Python与数据库](https://www.zybuluo.com/Scrazy/note/702005)
+11. [新浪博客抓取及简单聚类](https://www.zybuluo.com/mdeditor#709348)
+12. [Python 算法教程 笔记](https://www.zybuluo.com/Scrazy/note/530998)
+13. [TCP简述](https://www.zybuluo.com/Scrazy/note/717147)
+14. [单例模式](https://www.zybuluo.com/Scrazy/note/719017)
diff --git a/binary_search.md b/binary_search.md
new file mode 100644
index 0000000..7e7034b
--- /dev/null
+++ b/binary_search.md
@@ -0,0 +1,136 @@
+﻿
+---
+title: 二分法查找
+date: 2016-09-23 15:32:24
+tags: Algorithm
+---
+
+二分法查找，顾名思义，二分、二分就是分成两半呗。(有的翻译是`折半法搜索`比如SICP里翻译的就是`折半法搜索`)。它的复杂度为O(logn)，在列表(已排序)中对给定值`value`进行查找并输出其索引(index)值。
+
+```python
+# -*- coding: utf-8 -*-
+
+
+def binary_search(lst, value):
+    left, right = 0, len(lst) - 1
+    
+    while left <= right:
+        middle = int((left + right) / 2) # 取`lst`中值索引
+        
+        if value > lst[middle]:
+            left = middle + 1 # value大于`lst`中值，让左边界等于 middle + 1
+        elif value < lst[middle]:
+            right = middle - 1 # 类似上
+            
+        else:
+            return "The value's index is {}".format(middle)
+    return "There is no {}".format(value)
+
+if __name__ == '__main__':
+    lst = [1, 3, 5, 7, 9]
+    value = int(input("Please input the value(1-10): "))
+    print(binary_search(lst, value))
+
+```
+
+再来个递归(recursion)版的吧， 不作过多解释啦！
+```python
+# -*- coding: utf-8 -*-
+
+
+def binary_search_rec(lst, value, left, right):
+    middle = int((left + right) / 2)
+
+    if left > right:
+        return "I'm sorry, there is no {}".format(value)
+
+    if value < lst[middle]:
+        return binary_search_rec(lst, value, left, middle - 1)
+
+    elif value > lst[middle]:
+    	return binary_search_rec(lst, value, middle + 1, right)
+
+    else:
+        return "Congratulations, the value's({}) index is {}".format(value, middle)
+
+
+if __name__ == '__main__':
+    lst = [1, 3, 5, 7, 9]
+    left, right = 0, len(lst)
+    value = int(input("Please input the value: "))
+    print(binary_search_rec(lst, value, left, right))
+```
+没事。温习以下二分搜索！
+
+被拼写错误折磨了一晚上。好好的lft被我写成ltf。debug生无可恋！
+```python
+from random import randrange
+def binary_search(seq, sit, lft, rgt):
+    mid = (lft + rgt) // 2
+    
+    if lft > rgt:
+        return 'The seq no {}'.format(sit)
+    
+    if sit > seq[mid]:
+        return binary_search(seq, sit, mid+1, rgt)
+    
+    elif sit < seq[mid]:
+        return binary_search(seq, sit, lft, mid-1)
+    
+    else:
+        return 'The {} in the seq and the station is {}'.format(sit, mid)
+        
+if __name__ == '__main__':
+    seq = [1, 4, 6, 8, 9, 12, 44, 56]
+    lft, rgt = 0, len(seq)
+    print(binary_search(seq, 4, lft, rgt))
+```
+昨天面试,面试官出了一道算法题:
+
+> 有一个数组,其内元素先递增后递减,请找出其中的最大值.
+
+对于我来说,当时第一个想起来的是,排序但是转念间就知道肯定不是最好的啦.于是就在哪儿想啊想,还是想不起来.气氛挺尴尬的,外面也挺冷的(电话面试,外面安静).我想不起来,面试小哥也不急着催我,最后也算是在小哥的提示下,想起了怎么做啦!(太感谢小哥啦, 小哥好人! 喂, 你们几个不许笑啊喂!)
+
+当然是**二分**啦,下面是算法实现!
+
+```python
+# coding=utf-8
+def search_max_num(seq, left, right):
+    mid = (right + left) // 2
+    if left > right:
+        return seq[mid]
+    if seq[mid] > seq[mid - 1]:
+        return search_max_num(seq, mid + 1, right)
+    else:
+        return search_max_num(seq, left, mid - 1)
+if __name__ == "__main__":
+    seq = [32, 55, 54, 54, 54, 54, 32, 15, 6, 4, 2, 1]
+    print(search_max_num(seq, 0, len(seq)))
+```
+###  二维数组的查找
+class Solution:
+    # array 二维列表
+
+    def find(self, target, array):
+        # write code here
+        for arr in array:
+            lft, rgt =0, len(arr) - 1
+            while lft <= rgt:
+                mid = (lft + rgt) // 2
+                if target > arr[mid]:
+                    lft = mid + 1
+                elif target < arr[mid]:
+                    rgt = mid - 1
+                else:
+                    return arr[mid]
+        return 'No target'
+
+
+target = 8
+array = [
+    [1, 3, 5, 7, 9],
+    [2, 4, 6, 8, 10]
+]
+solution = Solution()
+solution.find(target, array)
+```
diff --git a/binary_tree.md b/binary_tree.md
new file mode 100644
index 0000000..9aa4289
--- /dev/null
+++ b/binary_tree.md
@@ -0,0 +1,354 @@
+---
+更新 `2017-03-22`
+---
+
+畏惧了好久的二叉树，终于在近两天开搞了。二分法查找已在前几天完成，磨刀霍霍向猪羊，吼吼吼！ 何为二叉树？按照我目前的理解就是类似于发叉的树，树干上发两个叉或者一个(不发叉的树真不到有何用处)，发叉的地方称为**节点**。然后发的两个叉又可以继续像树干一样发叉，新发的叉有可以继续发叉，子又生子，孙又生孙，无穷尽也！但是**树的左边的叉的值小于节点值，右边的大于节点值**。
+
+本文参考： [老齐的Github](https://github.com/qiwsir/algorithm/blob/master/binary_tree.md)
+
+首先，建立一棵树。
+
+```python
+class Node:
+    def __init__(self, data):
+        self.left = None
+        self.right = None
+        self.data = data
+```
+
+这样，光秃秃的小树苗就种好了。接着就是茁长生长啦。浇水去喽！
+
+```python
+class Node：
+    '''
+    ...
+    '''
+    def insert(self, data):
+        if data < self.data: # 树叉小于节点
+            if self.left is None: # 并且左面的树叉为空
+                self.left = Node(data) # 当仁不让的插入
+            else:                   # 非空的话
+                self.left.insert(data) # 以左树叉为节点继续插入
+
+        elif data > self.data:
+            if self.right is None:
+                self.right = Node(data)
+            else:
+                self.right.insert(data)
+        else:
+            self.data = data
+```
+
+浇完水后，小树苗噌噌的往上窜啊。
+
+```python
+class Node：
+    '''
+    省略上述代码
+    '''
+    def search(self, data, parent=None):
+    '''
+    data为目标查询值，同时返回parent(父节点)便于定位。
+    '''
+        if data < self.data:
+            if self.left is None:
+                return None, None
+            else:
+                return self.left.search(data, self)
+
+        elif data > self.data:
+            if self.right is None:
+                return None, None
+
+            return self.right.search(data, self)
+        else:
+           #  return self.data, parent.data
+            return self, parent
+```
+
+树苗生长的那么好，想看看每个叉上都是啥呀，来来来，抬头往上看((其实是往下看啦)。
+
+```python
+def print_tree(self):
+        if self.left:
+            self.left.print_tree()
+        print(self.data)
+        if self.right:
+            self.right.print_tree()
+```
+
+树的遍历又分为以下三种：
+
+1. 前序(root -> left -> right)
+2. 中序(left -> root -> right)
+3. 后序(left -> right -> root)
+
+调整`print_tree`函数里 `print(self.data)` 的顺序即可实现三种遍历方式。
+
+转眼间小树苗涨的太旺盛了，疯涨啊！！怎么办呢，剪几个枝吧。别怪我哦，小树苗！ 删除节点时，有三种可能的情况：
+
+1. 目标节点下没有任何节点(0个)
+2. 目标节点下有一个节点
+3. 目标节点下有两个节点
+
+判断节点数目程序如下：
+
+```python
+class Node：
+'''
+省略代码
+'''
+def chrildren(self):
+    count = 0
+    if self.left:
+        count += 1
+
+    if self.right:
+        count += 1
+
+    return count
+```
+
+接下来就是删除操作啦。哦吼吼。
+
+```python
+class Node：
+'''
+省略
+'''
+
+def delete(self, data):
+    node, parent = self.search(data)
+    chrildren = node.chrildren() # 子节点数目
+    if chrildren == 0: # 情况 1， 没有子节点，直接删除即可
+        if parent.left is node: # 判断目标节点是其父节点的 左or右 节点
+            parent.left = None
+        else:
+            parent.right = None
+        del node
+
+    elif chrildren == 1: # 情况 2， 有一个子节点，用子节点替换其即可
+        if node.left:
+            tmp = node.left
+        else:
+            tmp = node.right
+        if parent:
+            if parent.left is node:
+                parent.left = tmp
+            else:
+                parent.right = tmp
+        del node
+    else:
+    '''
+    第三种情况比较复杂：
+    1\. 左节点0个子节点
+    2\. 左节点1个子节点
+    3\. 左节点2个子节点
+    '''
+        parent = node
+        successor = node.right
+        while successor.left:  # 递归思想，直至找到'最左'的子节点， 保持树的平衡，用右子节点的值替换
+            parent = successor
+            successor = successor.left
+        node.data = successor.data
+        if parent.left ==  successor:
+            parent.left = successor.right
+        else:
+            parent.right = successor.right
+
+# 接下来可以测试以下种的树怎么样啦。
+
+root = Node(11) root.insert(14) root.insert(9) root.insert(9) root.insert(7) root.insert(10) root.insert(4) root.insert(5) root.insert(6) root.insert(8) value, parent = root.search(10) print(value.data, parent.data) root.print_tree() print('_'_ 20) root.delete(4) root.print_tree()
+
+```
+把自己理解的部分写了写。当做练习，就先当个α版吧。
+`2016-05-28`
+
+
+基本搞明白了
+完整代码[在这里](https://github.com/lambdaplus/python/blob/master/binary_tree.py)
+
+### 广度遍历和深度遍历二叉树！
+
+```python
+def lookup(root):
+    stack = [root]
+    while stack:
+        current = stack.pop()
+        print(current.data)
+        if current.left:
+            stack.append(current.left)
+        if current.right:
+            stack.append(current.right)
+
+
+def deep(root):
+    if not root:
+        return
+    deep(root.left)
+    deep(root.right)
+    print(root.data)
+```
+### 求最大树深
+
+```python
+# -*- coding:utf-8 -*-
+class TreeNode:
+    def __init__(self, x):
+        self.val = x
+        self.left = None
+        self.right = None
+
+class Solution:
+    def TreeDepth(self, pRoot):
+        if not pRoot:
+            return 0
+        return max(self.TreeDepth(pRoot.left), self.TreeDepth(pRoot.right)) + 1
+```
+
+### 比较两棵树是否相同
+
+```python
+def is_same(t1, t2):
+    if t1 == None and t2 == None:
+        return True
+    elif t1 and t2:
+        return t1.data == t2.data and is_same(t1.left, t2.left)\
+                                  and is_same(t1.right, t2.right)
+    else:
+        return False
+```
+
+### 已知前序中序求后序
+
+前面说到：
+前序: root -> left -> right
+中序: left -> root -> right
+后序: left -> right -> root
+
+前序: 第一个值 A 即为根节点
+中序: A 的左边全为左子树，右边全是右子树
+
+```python
+def pre_in_post(pre_order, in_order):
+    if not pre_order:
+        return
+    post = Node(pre_order[0])
+    index = in_order.index(pre_order[0])
+    post.left = pre_in_post(pre_order[1:index+1], in_order[:index])
+    post.right = pre_in_post(pre_order[index+1:], in_order[index+1:])
+    return post
+```
+### 已知前序中序构造出树
+```python
+# -*- coding:utf-8 -*-
+class TreeNode:
+    def __init__(self, x):
+        self.val = x
+        self.left = None
+        self.right = None
+
+class Solution:
+    # 返回构造的TreeNode根节点
+    def reConstructBinaryTree(self, pre, tin):
+        # write code here
+        if not pre:
+            return
+        tree = TreeNode(pre[0])
+        index = tin.index(pre[0])
+        tree.left = self.reConstructBinaryTree(pre[1:index+1],tin[:index])
+        tree.right = self.reConstructBinaryTree(pre[index+1:],tin[index+1:])
+        return tree
+
+    @classmethod
+    def print_tree(cls, tree):
+        if tree:
+            cls.print_tree(tree.left)
+            cls.print_tree(tree.right)
+            print(tree.val)
+
+pre = [1,2,3,4,5,6,7]
+tin = [3,2,4,1,6,5,7]
+s = Solution()
+t = s.reConstructBinaryTree(pre, tin)
+s.print_tree(t)
+```
+### 树的子结构
+
+```python
+求pRoot2 的子树是否为 pRoot2
+# -*- coding:utf-8 -*-
+# class TreeNode:
+#     def __init__(self, x):
+#         self.val = x
+#         self.left = None
+#         self.right = None
+class Solution:
+    def is_subtree(self, t1, t2): 
+        if not t2:               # t2 is None 其为子树
+            return True
+        if not t1:
+            return False
+        if not t1.val == t2.val:
+            return False
+        return self.is_subtree(t1.left, t2.left) and self.is_subtree(t1.right, t2.right)
+        
+    def HasSubtree(self, pRoot1, pRoot2):
+        # write code here
+        result = False
+        if pRoot1 and pRoot2:
+        	if pRoot1.val == pRoot2.val:
+        	    result = self.is_subtree(pRoot1, pRoot2)
+        	if not result:
+        	    result = self.is_subtree(pRoot1.left, pRoot2)
+        	if not result:
+        	    result = self.is_subtree(pRoot1.right, pRoot2)
+		return result
+```
+### 对称二叉树
+
+```
+# -*- coding:utf-8 -*-
+# class TreeNode:
+#     def __init__(self, x):
+#         self.val = x
+#         self.left = None
+#         self.right = None
+class Solution:
+
+    def isSymmetrical(self, pRoot):
+        def is_same(p1, p2):
+            if not (p1 or p2):
+                return True
+            elif p1 and p2 and p1.val == p2.val:
+                return is_same(p1.left, p2.right) and is_same(p1.right, p2.left)
+            return False
+
+        if not pRoot:
+            return True
+        return is_same(pRoot.left, pRoot.right)
+```
+### 二叉树镜像
+
+```
+# -*- coding:utf-8 -*-
+# class TreeNode:
+#     def __init__(self, x):
+#         self.val = x
+#         self.left = None
+#         self.right = None
+class Solution:
+    # 返回镜像树的根节点
+    def Mirror(self, root):
+        # write code here
+        if not root:
+            return None
+        elif not (root.left or root.right):
+            return root
+    	
+        root.left, root.right = root.right, root.left
+        if root.left:
+            self.Mirror(root.left)
+        if root.right:
+            self.Mirror(root.right)
+``` 
diff --git a/binary_tree.py b/binary_tree.py
new file mode 100644
index 0000000..876a35b
--- /dev/null
+++ b/binary_tree.py
@@ -0,0 +1,128 @@
+class Node:
+
+    def __init__(self, data):
+        self.left = None
+        self.right = None
+        self.data = data
+
+    def insert(self, data):
+        if data < self.data:  # 树叉小于节点
+            if self.left is None:  # 并且左面的树叉为空
+                self.left = Node(data)  # 当仁不让的插入
+            else:                   # 非空的话
+                self.left.insert(data)  # 以左树叉为节点继续插入
+
+        elif data > self.data:
+            if self.right is None:
+                self.right = Node(data)
+            else:
+                self.right.insert(data)
+        else:
+            self.data = data
+
+    def search(self, data, parent=None):
+        '''
+        data为目标查询值，同时返回parent(父节点)便于定位。
+        '''
+        if data < self.data:
+            if self.left is None:
+                return None, None
+            else:
+                return self.left.search(data, self)
+
+        elif data > self.data:
+            if self.right is None:
+                return None, None
+
+            return self.right.search(data, self)
+        else:
+          #  return self.data, parent.data
+            return self, parent
+
+    def print_tree_in(self):  # 中序
+        if self.left:
+            self.left.print_tree_in()
+        print(self.data)
+        if self.right:
+            self.right.print_tree_in()
+
+    def print_tree_pre(self):  # 前序
+        print(self.data)
+        if self.left:
+            self.left.print_tree_pre()
+        if self.right:
+            self.right.print_tree_pre()
+
+    def print_tree_post(self):  # 后序
+        if self.left:
+            self.left.print_tree_post()
+        if self.right:
+            self.right.print_tree_post()
+        print(self.data)
+
+    def chrildren(self):
+        count = 0
+        if self.left:
+            count += 1
+
+        if self.right:
+            count += 1
+
+        return count
+
+    def delete(self, data):
+        node, parent = self.search(data)
+        chrildren = node.chrildren()  # 子节点数目
+        if chrildren == 0:  # 情况 1， 没有子节点，直接删除即可
+            if parent.left is node:  # 判断目标节点是其父节点的 左or右 节点
+                parent.left = None
+            else:
+                parent.right = None
+            del node
+
+        elif chrildren == 1:  # 情况 2， 有一个子节点，用子节点替换其即可
+            if node.left:
+                tmp = node.left
+            else:
+                tmp = node.right
+            if parent:
+                if parent.left is node:
+                    parent.left = tmp
+                else:
+                    parent.right = tmp
+                del node
+        else:
+            '''
+            第三种情况比较复杂：
+            1. 左节点0个子节点
+            2. 左节点1个子节点
+            3. 左节点2个子节点
+            '''
+            parent = node
+            successor = node.right
+            while successor.left:  # 递归思想，直至找到最左的子节点， 保持树的平衡，用右子节点的值替换
+                parent = successor
+                successor = successor.left
+                node.data = successor.data
+            if parent.left == successor:
+                parent.left = successor.right
+            else:
+                parent.right = successor.right
+
+# 接下来可以测试以下种的树怎么样啦。
+root = Node(11)
+root.insert(14)
+root.insert(9)
+root.insert(9)
+root.insert(7)
+root.insert(10)
+root.insert(4)
+root.insert(5)
+root.insert(6)
+root.insert(8)
+value, parent = root.search(10)
+print(value.data, parent.data)
+root.print_tree_in()
+print('*' * 20)
+root.delete(4)
+root.print_tree_in()
diff --git a/category_all.py b/category_all.py
new file mode 100644
index 0000000..ce03fd4
--- /dev/null
+++ b/category_all.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+import re
+import concurrent.futures
+import requests
+from bs4 import BeautifulSoup as bs
+from pymongo import MongoClient
+
+
+def fetch(url):
+    res = requests.get(url)
+    res.encoding = 'gbk'
+    content = bs(res.text, 'lxml')
+    return content
+
+
+def base_info(html):
+    pattern = re.compile(r'http://blog.sina.com.cn/s/blog_.*\.html')
+    links = re.findall(pattern, str(html))
+    date_ = re.findall(r'\((\d{2,}.*)\)', str(html))
+    tle_auth = html.select('li')
+    authes = (auth.text.split(' ')[0] for auth in tle_auth)
+    titles = (title.text.split(' ')[-1] for title in tle_auth)
+    for infos in zip(links, titles, authes, date_):
+        yield infos
+
+
+def save(url):
+    html = fetch(url)
+    data = base_info(html)
+    client = MongoClient('localhost', 27017)
+    db = client.infos
+    coll = db.coll
+    for num, d in enumerate(data, 1):
+        datum = {
+            'links': d[0],
+            'title': d[1],
+            'auther': d[2],
+            'date': d[3]
+        }
+
+        count = coll.find({'links': d[0]}).count()
+        if count == 0:
+            coll.insert_one(datum)
+    print('{} is grabbed'.format(url))
+
+
+if __name__ == '__main__':
+    url = 'http://roll.blog.sina.com.cn/list/other/index_{}.shtml'
+
+    start = int(input('请输入开始页数, 默认为1 >> '))
+    if not start:
+        start = 1
+
+    end = int(input('输入结束页数， 默认为100 >> '))
+    if not end:
+        end = 100
+
+    pages = range(start, end + 1)
+    urls = [url.format(page) for page in pages]
+
+    with concurrent.futures.ProcessPoolExecutor(max_workers=6) as executor:
+        executor.map(save, urls)
diff --git a/category_test.py b/category_test.py
new file mode 100644
index 0000000..af8a95e
--- /dev/null
+++ b/category_test.py
@@ -0,0 +1,72 @@
+# coding=utf-8
+
+import sys
+reload(sys)
+sys.setdefaultencoding('utf-8')
+
+import re
+import csv
+import requests
+import jieba
+import jieba.analyse
+from bs4 import BeautifulSoup as bs
+from tgrocery import Grocery
+from train_txt_5 import train_src
+
+
+def artical_content(url):
+    rsp = requests.get(url)
+    rsp.encoding = 'utf-8'
+    html = bs(rsp.text, 'lxml')
+    # artical = html.select('#sina_keyword_ad_area2')
+    artical = html.select('.h1_tit')
+    if len(artical) > 0:
+        content = artical[0].text
+    else:
+        content = html.select('.SG_txta')[0].text
+        # content = ''
+    return content.strip()
+
+
+def category(urls):
+    for url in urls:
+        artical = artical_content(url)
+        cate2 = new_grocery.predict(artical)
+        yield cate2.predicted_y
+
+
+def unzip(seq, L=None):
+    if L is None:
+        L = []
+    for s in seq:
+        if not isinstance(s, (list, )):
+            L.append(s)
+        else:
+            L.extend(unzip(s))
+    return L
+
+
+if __name__ == "__main__":
+
+    grocery = Grocery('sample')
+    grocery.train(train_src)
+    grocery.save()
+    new_grocery = Grocery('sample')
+    new_grocery.load()
+
+    L1 = []
+    with open('/home/mouse/Downloads/female.csv', 'r') as f1:
+        f1_csv = csv.reader(f1)
+        for row in f1_csv:
+            L1.append(row[0])
+    # print(len(L1))
+
+    cate = category(L1)
+    i = 1
+    with open('/home/mouse/infoss.csv', 'w') as f:
+        f_csv = csv.writer(f)
+        for row in zip(L1, cate):
+            f_csv.writerow(unzip(row))
+            print 'Writing now, please waiting...{}'.format(str(i))
+            i += 1
+    print 'Done'
diff --git a/celeries/proj/__init__.py b/celeries/proj/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/celeries/proj/celery.py b/celeries/proj/celery.py
new file mode 100644
index 0000000..acd9617
--- /dev/null
+++ b/celeries/proj/celery.py
@@ -0,0 +1,10 @@
+# coding=utf-8
+from __future__ import absolute_import
+from celery import Celery
+
+app = Celery('proj', include=['proj.tasks'])
+app.config_from_object('proj.celeryconfig')
+
+
+if __name__ == '__main__':
+    app.start()
diff --git a/celeries/proj/celeryconfig.py b/celeries/proj/celeryconfig.py
new file mode 100644
index 0000000..864e2f0
--- /dev/null
+++ b/celeries/proj/celeryconfig.py
@@ -0,0 +1,7 @@
+# coding=utf-8
+BROKER_URL = 'amqp://localhost'  # RabbitMQ 作为消息代理
+CELERY_RESULT_BACKEND = 'redis://localhost' # Redis 作为结果存储
+CELERY_TASK_SERIALIZER = 'msgpack'
+# 任务序列化和反序列化格式为 msgpack, 别忘了安装 msgpack-python
+CELERY_RESULT_SERIALIZER = 'json'   # 结果存储序列化格式为 json
+CELERY_ACCEPT_CONTENT = ['msgpack', 'json']  # 任务接受格式类型
diff --git a/celeries/proj/tasks.py b/celeries/proj/tasks.py
new file mode 100644
index 0000000..16d3ccb
--- /dev/null
+++ b/celeries/proj/tasks.py
@@ -0,0 +1,35 @@
+# coding=utf-8
+from __future__ import absolute_import
+
+from .celery import app
+
+from celery.utils.log import get_task_logger
+
+logger = get_task_logger(__name__)
+
+
+@app.task
+def add(x, y):
+    return x + y
+
+
+@app.task
+def mul(x, y):
+    return x * y
+
+
+@app.task(bind=True)
+def div(self, x, y):
+    logger.info(
+        '''
+        Executing task : {0.id}
+        task.args      : {0.args!r}
+        task.kwargs    : {0.kwargs!r}
+        '''.format(self.request)
+    )
+    try:
+        res = x / y
+    except ZeroDivisionError as e:
+        raise self.retry(exc=e, countdown=3, max_retries=3)
+    else:
+        return res
diff --git a/celeries/projb/__init__.py b/celeries/projb/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/celeries/projb/celery.py b/celeries/projb/celery.py
new file mode 100644
index 0000000..fb02e54
--- /dev/null
+++ b/celeries/projb/celery.py
@@ -0,0 +1,10 @@
+# coding=utf-8
+from __future__ import absolute_import
+from celery import Celery
+
+app = Celery('projb', include=['projb.tasks'])
+app.config_from_object('projb.celeryconfig')
+
+
+if __name__ == '__main__':
+    app.start()
diff --git a/celeries/projb/celeryconfig.py b/celeries/projb/celeryconfig.py
new file mode 100644
index 0000000..d661cc5
--- /dev/null
+++ b/celeries/projb/celeryconfig.py
@@ -0,0 +1,28 @@
+# coding=utf-8
+from kombu import Queue
+
+BROKER_URL = 'amqp://localhost'  # RabbitMQ 作为消息代理
+CELERY_RESULT_BACKEND = 'redis://localhost:6379/0'  # Redis 作为结果存储
+CELERY_TASK_SERIALIZER = 'msgpack'
+# 任务序列化和反序列化格式为 msgpack, 别忘了安装 msgpack-python
+CELERY_RESULT_SERIALIZER = 'json'   # 结果存储序列化格式为 json
+CELERY_ACCEPT_CONTENT = ['msgpack', 'json']  # 任务接受格式类型
+
+CELERY_QUEUES = {
+    Queue('foo', routing_key='task.#'),
+    Queue('feed_task', routing_key='*.feed'),
+}
+CELERY_DEFAULT_QUEUE = 'foo'
+
+CELERY_DEFAULT_EXCHANGE = 'tasks'
+
+CELERY_DEFAULT_EXCHANGE_TYPE = 'topic'
+
+CELERY_DEFAULT_ROUTING_KEY = 'task.foooooo'
+
+CELERY_ROUTES = {
+    'projb.tasks.mul': {
+        'queue': 'feed_task',
+        'routing_key': 'mul.feed',
+    },
+}
diff --git a/celeries/projb/tasks.py b/celeries/projb/tasks.py
new file mode 100644
index 0000000..9b7d2a7
--- /dev/null
+++ b/celeries/projb/tasks.py
@@ -0,0 +1,14 @@
+# coding=utf-8
+from __future__ import absolute_import
+
+from .celery import app
+
+
+@app.task
+def add(x, y):
+    return x + y
+
+
+@app.task
+def mul(x, y):
+    return x * y
diff --git a/celeries/projc/__init__.py b/celeries/projc/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/celeries/projc/celery.py b/celeries/projc/celery.py
new file mode 100644
index 0000000..125ef95
--- /dev/null
+++ b/celeries/projc/celery.py
@@ -0,0 +1,10 @@
+# coding=utf-8
+from __future__ import absolute_import
+from celery import Celery
+
+app = Celery('projc', include=['projc.tasks'])
+app.config_from_object('projc.celeryconfig')
+
+
+if __name__ == '__main__':
+    app.start()
diff --git a/celeries/projc/celeryconfig.py b/celeries/projc/celeryconfig.py
new file mode 100644
index 0000000..1cf48ff
--- /dev/null
+++ b/celeries/projc/celeryconfig.py
@@ -0,0 +1,37 @@
+# coding=utf-8
+from kombu import Queue
+
+BROKER_URL = 'amqp://localhost'  # RabbitMQ 作为消息代理
+CELERY_RESULT_BACKEND = 'redis://localhost:6379/0'  # Redis 作为结果存储
+CELERY_TASK_SERIALIZER = 'msgpack'
+# 任务序列化和反序列化格式为 msgpack, 别忘了安装 msgpack-python
+CELERY_RESULT_SERIALIZER = 'json'   # 结果存储序列化格式为 json
+CELERY_ACCEPT_CONTENT = ['msgpack', 'json']  # 任务接受格式类型
+
+CELERY_QUEUES = {
+    Queue('foo', routing_key='task.#'),
+    Queue('feed_task', routing_key='*.feed'),
+}
+CELERY_DEFAULT_QUEUE = 'foo'
+
+CELERY_DEFAULT_EXCHANGE = 'tasks'
+
+CELERY_DEFAULT_EXCHANGE_TYPE = 'topic'
+
+CELERY_DEFAULT_ROUTING_KEY = 'task.foooooo'
+
+CELERY_ROUTES = {
+    'projb.tasks.mul': {
+        'queue': 'feed_task',
+        'routing_key': 'mul.feed',
+    },
+
+}
+
+CELERYBEAT_SCHEDULE = {
+    'mul-every-30-seconds': {
+        'task': 'projc.tasks.mul',
+        'schedule': 30.0,
+        'args': (2, 2),
+    }
+}
diff --git a/celeries/projc/tasks.py b/celeries/projc/tasks.py
new file mode 100644
index 0000000..9b7d2a7
--- /dev/null
+++ b/celeries/projc/tasks.py
@@ -0,0 +1,14 @@
+# coding=utf-8
+from __future__ import absolute_import
+
+from .celery import app
+
+
+@app.task
+def add(x, y):
+    return x + y
+
+
+@app.task
+def mul(x, y):
+    return x * y
diff --git a/celery_learn.md b/celery_learn.md
new file mode 100644
index 0000000..f8c8b84
--- /dev/null
+++ b/celery_learn.md
@@ -0,0 +1,309 @@
+﻿# Celery 的简单使用
+
+标签： python celery
+
+---
+***代码[在这里](https://github.com/lambdaplus/python/tree/master/celeries)***
+
+
+Celery 是一个简单、灵活并且可靠的处理大量消息的分发系统。并且是自带电池的，本身提供了维护和操作这个系统的工具。
+
+Celery 专注于实时处理的任务队列，并且支持任务调度。
+优点：
+1. 简单
+2. 高可用
+3. 快速
+4. 灵活
+
+## Celery 架构
+
++ Celery Beat: 任务调度器
++ Celery Worker: 消费者
++ Broker: 消息中间件，常用的是 RabbitMQ 和 Redis
++ Producer：任务生产者
++ Result Backend：用于结果保存。
+
+## Celery 序列化
+
+
+
+## 一个简单的简单例子
+项目目录为
+```bash
+celeries/proj/
+├── celeryconfig.py
+├── celery.py
+├── __init__.py
+└── tasks.py
+```
+---
+主程序 celery.py
+```python
+from __future__ import absolute_import
+from celery import Celery
+
+app = Celery('proj', include=['proj.tasks'],
+app.config_from_object('proj.celeryconfig')
+
+
+if __name__ == "main":
+    app.start()
+```
+
+
+任务函数 tasks.py
+```python
+# coding=utf-8
+from __future__ import absolute_import
+
+from .celery import app
+
+
+@app.task
+def add(x, y):
+    return x + y
+
+
+@app.task
+def mul(x, y):
+    return x * y
+```
+接下来是 配置文件 celeryconfig.py
+```python
+# coding=utf-8
+BROKER_URL = 'amqp://localhost'  # RabbitMQ 作为消息代理
+CELERY_RESULT_BACKEND = 'redis://localhost:6379/0'  # Redis 作为结果存储
+CELERY_TASK_SERIALIZER = 'msgpack'
+# 任务序列化和反序列化格式为 msgpack, 别忘了安装 msgpack-python
+CELERY_RESULT_SERIALIZER = 'json'   # 结果存储序列化格式为 json
+CELERY_ACCEPT_CONTENT = ['msgpack', 'json']  # 任务接受格式类型
+```
+因为没有任务调度，所以直接启动消费者就行了。在启动之前，要先去安装 RabbitMQ 和 Redis， 并启动。
+
+现在启动我们的消费者函数, 命令行直接启动：
+
+    > cd celeries
+    > celery -A celeries worker -l info
+
+看到下面的提示信息，表示成功启动
+```python
+  -------------- celery@mouse-pc v4.0.2 (latentcall)
+---- **** ----- 
+--- * ***  * -- Linux-4.9.15-1-MANJARO-x86_64-with-glibc2.2.5 2017-03-22 21:53:05
+-- * - **** --- 
+- ** ---------- [config]
+- ** ---------- .> app:         celeries:0x7f9737da7a58
+- ** ---------- .> transport:   amqp://guest:**@localhost:5672//
+- ** ---------- .> results:     redis://localhost/
+- *** --- * --- .> concurrency: 2 (prefork)
+-- ******* ---- .> task events: OFF (enable -E to monitor tasks in this worker)
+--- ***** ----- 
+ -------------- [queues]
+                .> celery           exchange=celery(direct) key=celery
+                
+
+[tasks]
+  . celeries.tasks.add
+  . celeries.tasks.mul
+  . celeries.tasks.xsum
+
+[2017-03-22 21:53:06,011: INFO/MainProcess] Connected to amqp://guest:**@127.0.0.1:5672//
+[2017-03-22 21:53:06,034: INFO/MainProcess] mingle: searching for neighbors
+[2017-03-22 21:53:07,088: INFO/MainProcess] mingle: all alone
+[2017-03-22 21:53:07,115: INFO/MainProcess] celery@mouse-pc ready.
+```
+打开 IPython 测试一下我们的几个函数。
+```python
+~ ▶︎︎ ipython
+Python 3.6.0 |Anaconda 4.3.1 (64-bit)| (default, Dec 23 2016, 12:22:00) 
+Type "copyright", "credits" or "license" for more information.
+
+
+In [1]: from celeries.tasks import add, mul, xsum
+
+In [2]: add.delay(1, 9)
+Out[2]: <AsyncResult: 38022eec-2d3d-4ee0-8c7e-367ef92b5f1f>
+In [3]: r = mul.delay(2, 4)
+
+In [4]: r.status
+Out[4]: 'SUCCESS'
+
+In [5]: r.result
+Out[5]: 8
+
+In [6]: r.successful
+Out[6]: <bound method AsyncResult.successful of <AsyncResult: 17af4e48-736d-44c9-a8be-a50a35bbc435>>
+
+In [7]: r.backend
+Out[7]: <celery.backends.redis.RedisBackend at 0x7f5aebbbcba8> # 结果存储在 redis 里
+
+```
+delay() 是 apply_async() 的快捷方式。你也直接调用 apply_async() ：
+```python
+In [24]: r = mul.apply_async((2, 4))
+
+In [25]: r.result
+Out[25]: 8
+```
+delay() & apply_async 返回的都是 AsyncResult 实例，可用于查看任务的执行状态，但首先你要配置好 result backend.
+此时，在worker终端上可以看到，任务信息和结果
+```bash
+[2017-03-22 22:05:13,689: INFO/MainProcess] Received task: celeries.tasks.add[38022eec-2d3d-4ee0-8c7e-367ef92b5f1f]  
+[2017-03-22 22:05:14,765: INFO/PoolWorker-2] Task celeries.tasks.add[38022eec-2d3d-4ee0-8c7e-367ef92b5f1f] succeeded in 0.007736653999018017s: 10
+[2017-03-22 22:08:36,378: INFO/MainProcess] Received task: celeries.tasks.mul[17af4e48-736d-44c9-a8be-a50a35bbc435]  
+[2017-03-22 22:08:37,010: INFO/PoolWorker-2] Task celeries.tasks.mul[17af4e48-736d-44c9-a8be-a50a35bbc435] succeeded in 0.011531784999533556s: 8
+```
+仔细看，每个任务都有一个 task_id。我们可以通过 task_id 获得任务的结果。
+
+取 add 任务的 id：
+```bash
+task_id = '38022eec-2d3d-4ee0-8c7e-367ef92b5f1f'
+In [8]: task_id = '38022eec-2d3d-4ee0-8c7e-367ef92b5f1f'
+
+In [9]: add.AsyncResult(task_id).get()
+Out[9]: 10
+```
+关联任务
+
+    In [2]: m = mul.apply_async((2, 2), link=mul.s(3))
+    
+在 Worker 终端里会看到两个值，关联之前和之后的。
+```
+[2017-03-23 13:27:13,045: INFO/MainProcess] Received task: proj.tasks.mul[40492357-44bb-41e4-979f-6eb197107a5b]  
+[2017-03-23 13:27:13,731: INFO/PoolWorker-2] Task proj.tasks.mul[40492357-44bb-41e4-979f-6eb197107a5b] succeeded in 0.0023383530005958164s: 4
+[2017-03-23 13:27:13,732: INFO/MainProcess] Received task: proj.tasks.mul[b01be1b8-f957-48b2-9d72-8187af6ac161]  
+[2017-03-23 13:27:13,734: INFO/PoolWorker-2] Task proj.tasks.mul[b01be1b8-f957-48b2-9d72-8187af6ac161] succeeded in 0.0006868359996587969s: 12
+```
+
+
+## 指定队列
+在 celeries 目录下新建一个目录 projb, 代码使用 proj 中的。
+```bash
+celeries/projb
+├── celeryconfig.py
+├── celery.py
+├── __init__.py
+└── tasks.py
+```
+在 celeryconfig.py 添加些配置：
+```
+# coding=utf-8
+from kombu import Queue
+
+BROKER_URL = 'amqp://localhost'  # RabbitMQ 作为消息代理
+CELERY_RESULT_BACKEND = 'redis://localhost:6379/0'  # Redis 作为结果存储
+CELERY_TASK_SERIALIZER = 'msgpack'
+# 任务序列化和反序列化格式为 msgpack, 别忘了安装 msgpack-python
+CELERY_RESULT_SERIALIZER = 'json'   # 结果存储序列化格式为 json
+CELERY_ACCEPT_CONTENT = ['msgpack', 'json']  # 任务接受格式类型
+
+CELERY_QUEUES = {
+    Queue('foo', routing_key='task.#'), # 路由键以 task. 开头的消息进入此队列
+    Queue('feed_task', routing_key='*.feed'), # 路由键以 .feed 结尾的消息进入此队列
+}
+CELERY_DEFAULT_QUEUE = 'foo' # 默认队列
+
+CELERY_DEFAULT_EXCHANGE = 'tasks' # 默认交换机
+
+CELERY_DEFAULT_EXCHANGE_TYPE = 'topic' # 默认交换机类型 topic
+
+CELERY_DEFAULT_ROUTING_KEY = 'task.foooooooo' # 默认交换机路由键, task. 后的值不影响
+
+CELERY_ROUTES = {
+    'projb.tasks.mul': {
+        'queue': 'feed_task',  # 消息全都进入 feed_task 队列
+        'routing_key': 'mul.feed',
+    },
+}
+```
+然后，我们以指定队列的方式启动：
+
+    >  celery -A projb worker -Q foo,feed_task -l info
+
+tasks.py 中的 mul 函数只会通过队列 feed_task 被执行。add 函数通过默认队列 foo 执行。
+ ```python
+In [84]: from projb.tasks import mul, add
+
+In [85]: r = add.delay(3, 3)
+
+In [86]: r.result
+Out[86]: 6
+
+In [87]: res = mul.delay(3, 3)
+
+In [88]: res.result
+Out[88]: 9
+```
+不过，我们可以使用 apply_async() 函数来指定队列。
+```python
+In [90]: r = add.apply_async((3, 3), queue='feed_task', routing_key='mul.feed')
+
+In [91]: r.result
+Out[91]: 6
+
+In [92]: res = mul.apply_async((3, 3), queue='foo', routing_key='task.foooooo')
+
+In [93]: res.result
+Out[93]: 9
+```
+
+## 任务调度
+依法炮制，基于 projb 的代码，创建目录 projc，在 proc/celeryconfig.py 中添加如下配置。
+```
+CELERYBEAT_SCHEDULE = {
+    'mul-every-30-seconds': {
+        'task': 'projc.tasks.mul',
+        'schedule': 30.0,
+        'args': (2, 2),
+    }
+}
+```
+执行
+
+    > celery -B -A projc worker -l info
+
+就可以在终端看到每 30s 执行一次任务。
+```
+[2017-03-23 12:23:13,920: INFO/Beat] Scheduler: Sending due task mul-every-30-seconds (projc.tasks.mul)
+[2017-03-23 12:23:13,923: INFO/MainProcess] Received task: projc.tasks.mul[9c414257-d627-4c36-a9d8-9daed7e295c0]  
+[2017-03-23 12:23:15,177: INFO/PoolWorker-3] Task projc.tasks.mul[9c414257-d627-4c36-a9d8-9daed7e295c0] succeeded in 0.0010301589991286164s: 4
+```
+
+## 任务绑定、日志记录和错误重试
+
+任务绑定、记录日志和重试是 Celery 3 个常用的高级功能。接下来，修改 proj 的 tasks.py 文件。添加一个 div 函数。
+```
+@app.task(bind=True)
+def div(self, x, y):
+    logger.info(
+        '''
+        Executing task : {0.id}
+        task.args      : {0.args!r}
+        task.kwargs    : {0.kwargs!r}
+        '''.format(self.request)
+    )
+    try:
+        res = x / y
+    except ZeroDivisionError as e:
+        raise self.retry(exc=e, countdown=3, max_retries=3)
+    else:
+        return res
+```
+在 Ipython 调用：
+
+    In [3]: d = div.delay(2, 1)
+
+在 worker 中可以看到
+```
+[2017-03-23 14:57:17,361: INFO/PoolWorker-2] proj.tasks.div[68ef1584-16ac-4236-9858-b00842891bbc]: 
+        Executing task : 68ef1584-16ac-4236-9858-b00842891bbc
+        task.args      : [2, 1]
+        task.kwargs    : {}
+        
+[2017-03-23 14:57:17,369: INFO/PoolWorker-2] Task proj.tasks.div[68ef1584-16ac-4236-9858-b00842891bbc] succeeded in 0.007741746998362942s: 2.0
+```
+换成可以引起异常的参数：
+
+    In [4]: d = div.delay(2, 0)
+
+可以看到，在 worker 中每 3s 重试一次，总共重复三次(执行了 4 次)，然后抛出异常！
\ No newline at end of file
diff --git a/decorate/decorater_of_class.py b/decorate/decorater_of_class.py
new file mode 100644
index 0000000..3ab6999
--- /dev/null
+++ b/decorate/decorater_of_class.py
@@ -0,0 +1,16 @@
+# coding=utf-8
+class Log():
+
+    def __init__(self, file="info.log"):
+        self.file = file
+
+    def __call__(self, func):
+        log = func.__name__ + " was called"
+        print(log)
+        with open(self.file, 'a') as f:
+            f.write(log + '\n')
+
+
+@Log()
+def hello():
+    print('Hello World!')
diff --git a/decorate/decorater_with_para.py b/decorate/decorater_with_para.py
new file mode 100644
index 0000000..59878d1
--- /dev/null
+++ b/decorate/decorater_with_para.py
@@ -0,0 +1,29 @@
+# coding=utf-8
+
+from functools import wraps
+
+
+def logs(file="info.log"):
+    def decorate(func):
+        @wraps(func)
+        def wrapper(*args, **kw):
+            log = func.__name__ + " was called"
+            print(log)
+            with open(file, 'a') as f:
+                f.write(log + '\n')
+        return wrapper
+    return decorate
+
+
+@logs()
+def hello():
+    print('Hello World!')
+
+hello()
+
+
+@logs(file='info2.log')
+def hello2():
+    print('Hello World!')
+
+hello2()
diff --git a/decorate/decorater_without_para.py b/decorate/decorater_without_para.py
new file mode 100644
index 0000000..f8c286a
--- /dev/null
+++ b/decorate/decorater_without_para.py
@@ -0,0 +1,19 @@
+# coding=utf-8
+
+from functools import wraps
+
+
+def log(func):
+    @wraps(func)
+    def wrapper(*args, **kw):
+        print("I'm a log ^*^")
+        result = func(*args, **kw)
+        return result
+    return wrapper
+
+
+@log
+def hello():
+    print('Hello everybody')
+
+hello()
diff --git a/host_trans_anti.py b/host_trans_anti.py
new file mode 100644
index 0000000..9e937f2
--- /dev/null
+++ b/host_trans_anti.py
@@ -0,0 +1,30 @@
+#UTF-8
+
+#把anti-AD的dsnmasq转换成MelinClash的hosts格式
+
+import wget
+from datetime import date
+today = date.today()
+
+def host_trans(url):
+    file = wget.download(url, out='/home/lambda/Documents/adblock/anti_ad_'+str(today)+'.conf')
+    with open(file, 'r') as f:
+        new_f = open('/home/lambda/Documents/adblock/anti_ad_'+str(today)+'.yaml', 'w')
+        new_f.write('hosts:\n')
+        new_f.write('  router.asus.com: 192.168.50.1\n')
+        new_f.write('  services.googleapis.cn: 74.125.193.94\n')
+        for lines in f:
+            if '#' not in lines and not lines == "\n":
+                # 获取网址
+                tail = lines[9:].strip()
+                # tail[:-1] 去掉字符串尾部的 /
+                new_lines = "  " + tail[:-1] + ": " + '127.0.0.1'
+                new_f.write(new_lines+'\n')
+            else:
+                pass
+        new_f.close()
+    print("anti-ad.yaml文件保存在文档/adblock文件夹下")
+    
+if __name__ == '__main__':
+    url = 'https://anti-ad.net/anti-ad-for-dnsmasq.conf'
+    host_trans(url)
\ No newline at end of file
diff --git "a/hosts\350\275\254\346\215\242\345\244\247\345\234\243\345\207\200\345\214\226.py" "b/hosts\350\275\254\346\215\242\345\244\247\345\234\243\345\207\200\345\214\226.py"
new file mode 100644
index 0000000..6a69229
--- /dev/null
+++ "b/hosts\350\275\254\346\215\242\345\244\247\345\234\243\345\207\200\345\214\226.py"
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[32]:
+
+
+with open('/home/lambda/Downloads/host.html', 'r') as f:
+    new_f = open('/home/lambda/Downloads/ad.yaml', 'w')
+    new_f.write('hosts:\n')
+    new_f.write('  router.asus.com: 192.168.50.1\n')
+    new_f.write('  services.googleapis.cn: 74.125.193.94\n')
+    for lines in f:
+        if '#' not in lines and not lines == "\n":
+            tail = lines[9:].strip()
+            head = lines[:9].strip()
+            new_lines = "  " + tail + ": " + head
+            new_f.write(new_lines+'\n')
+        else:
+            pass
+    new_f.close()
+
+
+# In[ ]:
+
+
+
+
diff --git "a/hosts\350\275\254\346\215\242\345\244\247\345\234\243\345\207\200\345\214\226\345\222\214anti-AD.ipynb" "b/hosts\350\275\254\346\215\242\345\244\247\345\234\243\345\207\200\345\214\226\345\222\214anti-AD.ipynb"
new file mode 100644
index 0000000..c0b98b9
--- /dev/null
+++ "b/hosts\350\275\254\346\215\242\345\244\247\345\234\243\345\207\200\345\214\226\345\222\214anti-AD.ipynb"
@@ -0,0 +1,125 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 大圣净化的转换代码"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('/home/lambda/Downloads/host.html', 'r') as f:\n",
+    "    new_f = open('/home/lambda/Downloads/ad.yaml', 'w')\n",
+    "    new_f.write('hosts:\\n')\n",
+    "    new_f.write('  router.asus.com: 192.168.50.1\\n')\n",
+    "    new_f.write('  services.googleapis.cn: 74.125.193.94\\')\n",
+    "    for lines in f:\n",
+    "        if '#' not in lines and not lines == \"\\n\":\n",
+    "            tail = lines[9:].strip()\n",
+    "            head = lines[:9].strip()\n",
+    "            new_lines = \"  \" + tail + \": \" + head\n",
+    "            new_f.write(new_lines+'\\n')\n",
+    "        else:\n",
+    "            pass\n",
+    "    new_f.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## anti-AD的转换代码"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('/home/lambda/Downloads/adblock-for-dnsmasq.con.html', 'r') as f:\n",
+    "    new_f = open('/home/lambda/Downloads/anti_ad.yaml', 'w')\n",
+    "    new_f.write('hosts:\\n')\n",
+    "    new_f.write('  router.asus.com: 192.168.50.1\\n')\n",
+    "    new_f.write('  services.googleapis.cn: 74.125.193.94\\n')\n",
+    "    for lines in f:\n",
+    "        if '#' not in lines and not lines == \"\\n\":\n",
+    "            # 获取网址\n",
+    "            tail = lines[9:].strip()\n",
+    "            # tail[:-1] 去掉字符串尾部的 /\n",
+    "            new_lines = \"  \" + tail[:-1] + \": \" + '127.0.0.1'\n",
+    "            new_f.write(new_lines+'\\n')\n",
+    "        else:\n",
+    "            pass\n",
+    "    new_f.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 合体"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-35-4bb884b7d097>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      2\u001b[0m     \u001b[0;32mwhile\u001b[0m \u001b[0mf2\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mf3\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mf2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m             \u001b[0mf1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      5\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mf3\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m             \u001b[0mf1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf3\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/anaconda3/lib/python3.8/codecs.py\u001b[0m in \u001b[0;36mdecode\u001b[0;34m(self, input, final)\u001b[0m\n\u001b[1;32m    320\u001b[0m         \u001b[0;31m# decode input (taking the buffer into account)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    321\u001b[0m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 322\u001b[0;31m         \u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconsumed\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_buffer_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    323\u001b[0m         \u001b[0;31m# keep undecoded input until the next call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    324\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mconsumed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "with open('/home/lambda/Downloads/anti_dasheng.yaml', 'w') as f1, open('/home/lambda/Downloads/ad.yaml', 'r') as f2, open('/home/lambda/Downloads/anti_ad.yaml', 'r') as f3:\n",
+    "    while f2 or f3:\n",
+    "        if f2:\n",
+    "            f1.write(f2.readline())\n",
+    "        if f3:\n",
+    "            f1.write(f3.readline())\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/husheng_action.py b/husheng_action.py
new file mode 100644
index 0000000..f57e5d1
--- /dev/null
+++ b/husheng_action.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Apr 18 17:34:03 2021
+
+@author: lambda
+"""
+import aiohttp
+import asyncio
+import re
+import os
+import sys
+import json
+import random
+import pandas as pd
+import datetime
+import time
+import requests as request
+from lxml import etree
+
+user_agent = [
+    "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
+    "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
+    "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
+    "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",
+    "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
+    "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
+    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
+    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
+    "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
+    "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
+    "Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
+    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)",
+    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",
+    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
+    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",
+    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
+    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
+    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)",
+    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
+    "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
+    "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
+    "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
+    "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
+    "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
+    "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
+    "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
+    "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
+    "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
+    "Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
+    "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
+    "UCWEB7.0.2.37/28/999",
+    "NOKIA5700/ UCWEB7.0.2.37/28/999",
+    "Openwave/ UCWEB7.0.2.37/28/999",
+    "Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999",
+    # iPhone 6：
+	"Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25",
+]
+
+start = time.time()
+
+host = 'http://data.eastmoney.com/hsgtcg/list.html?DateType=DateType=%27jd%27'
+res = request.get(host)
+xml = etree.HTML(res.text)
+result = xml.xpath('/html/body/div[1]/div[8]/div[2]/div[2]/div[1]/div[1]/div/span/text()')[0]
+today = result[1:11]
+print(f'今天获取的数据是: {today}')
+
+fname = str(today)+".xlsx"
+fname1 = "PPOS_POTE_"+fname
+fname2 = "PPOS_POTE_SZ_"+fname
+
+#file_path = './hushengang'
+#if not os.path.exists(file_path):
+#    os.mkdir(file_path)
+
+#if os.path.exists(fname):
+#    print('文件已存在，明天再来吧')
+#    sys.exit()
+
+heads = {'HdDate', 'SCode', 'SName', 'NewPrice', 'ShareSZ_Chg_One',  'ShareSZ_Chg_Rate_One', 'LTZB_One', 'ZZB_One'}
+rows = []
+
+# 获取网页信息
+async def fetch(session, url):
+    headers = {'User-Agent': random.choice(user_agent)}
+    async with session.get(url, headers=headers) as response:
+        return await response.text(encoding='utf-8')
+    
+# 解析网页
+async def parser(html):
+    pat = re.compile('data:(.*)}', re.S) # 使用正则
+    result = re.search(pat, html).group(1)
+    data = json.loads(result)
+    if len(data) == 0:
+        print('日期有错误，看看是不是日期不对。。。。。。')
+        sys.exit()
+    for d in data:
+        row = {key: value for key, value in d.items() if key in heads}
+        rows.append(row)
+    
+# 下载网页
+async def download(url):
+    async with aiohttp.ClientSession() as session:
+        html = await fetch(session, url)
+        await parser(html)
+
+#urls = [f'http://dcfm.eastmoney.com/em_mutisvcexpandinterface/api/js/get?callback=jQuery112305322211230994847_1618827285261&st=ShareSZ_Chg_One&sr=-1&ps=50&p='+str(p)+'&type=HSGT20_GGTJ_SUM&token=894050c76af8597a853f5b408b759f5d&js=%7B%22data%22%3A(x)%2C%22pages%22%3A(tp)%2C%22font%22%3A(font)%7D&filter=(DateType%3D%27jd%27)(HdDate%3D%27'+str(today)+'%27)' for p in range(1, 31)]
+urls = [f'http://dcfm.eastmoney.com/EM_MutiSvcExpandInterface/api/js/get?type=HSGT20_GGTJ_SUM&token=894050c76af8597a853f5b408b759f5d&st=ShareSZ_Chg_One&sr=-1&p='+str(p)+'&ps=50&js=var%20mXyeKPjW={pages:(tp),data:(x)}&filter=(DateType=%27jd%27%20and%20HdDate=%27'+str(today)+'%27)&rt=53931781' for p in range(1, 31)]
+
+# 利用asyncio模块进行异步IO处理
+async def main():
+    await asyncio.gather(*[download(url) for url in urls])
+               
+asyncio.run(main())
+# 将rows转化为pandas中的DataFrame
+df = pd.DataFrame(rows)
+df.columns = ['日期', '代码', '名称', '最新股价' , '市值', '市值增幅', '占流通股比', '占总股比']
+# 从大到小排序
+df.sort_values(by='市值', ascending=False)
+try:
+    df.to_excel(fname) # 保存成Excel文件
+except Exception as e:
+    print("请关闭文件后再试", e)
+
+df1 = df.nlargest(20, '占总股比')
+df2 = df.nlargest(20, "占流通股比")
+df3 = df.nlargest(10, "市值")
+# 占总股比前20和流通股比前20的交集
+df1_df2 = pd.merge(df1, df2, on=list(df.columns), how='inner')
+# 三者的交集
+df1_df2_df3 = pd.merge(df1_df2, df3, on=list(df.columns), how='inner')
+try:
+    df1_df2.to_excel(fname1) 
+except Exception as e:
+    print("请关闭文件后再试", e)
+   
+try:
+    df1_df2_df3.to_excel(fname2)
+except Exception as e:
+    print("请关闭文件后再试", e)
+
+stop = time.time()
+print(f"使用aiohttp共耗时{stop-start} S")
diff --git a/hushengangtong.py b/hushengangtong.py
new file mode 100644
index 0000000..9b841a5
--- /dev/null
+++ b/hushengangtong.py
@@ -0,0 +1,147 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Apr 18 17:34:03 2021
+
+@author: lambda
+"""
+import aiohttp
+import asyncio
+import re
+import os
+import sys
+import json
+import random
+import pandas as pd
+import datetime
+import time
+import requests as request
+from lxml import etree
+
+user_agent = [
+    "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
+    "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
+    "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
+    "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",
+    "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
+    "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
+    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
+    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
+    "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
+    "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
+    "Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
+    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)",
+    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",
+    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
+    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",
+    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
+    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
+    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)",
+    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
+    "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
+    "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
+    "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
+    "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
+    "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
+    "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
+    "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
+    "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
+    "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
+    "Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
+    "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
+    "UCWEB7.0.2.37/28/999",
+    "NOKIA5700/ UCWEB7.0.2.37/28/999",
+    "Openwave/ UCWEB7.0.2.37/28/999",
+    "Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999",
+    # iPhone 6：
+	"Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25",
+]
+
+start = time.time()
+
+host = 'http://data.eastmoney.com/hsgtcg/list.html?DateType=DateType=%27jd%27'
+res = request.get(host)
+xml = etree.HTML(res.text)
+result = xml.xpath('/html/body/div[1]/div[8]/div[2]/div[2]/div[1]/div[1]/div/span/text()')[0]
+today = result[1:11]
+print(f'今天获取的数据是: {today}')
+
+fname = str(today)+".xlsx"
+fname1 = "PPOS_POTE_"+fname
+fname2 = "PPOS_POTE_SZ_"+fname
+
+file_path = './'
+#if not os.path.exists(file_path):
+#    os.mkdir(file_path)
+#os.chdir(file_path)
+
+if os.path.exists(fname):
+    print('文件已存在，明天再来吧')
+    sys.exit()
+
+heads = {'HdDate', 'SCode', 'SName', 'NewPrice', 'ShareSZ_Chg_One',  'ShareSZ_Chg_Rate_One', 'LTZB_One', 'ZZB_One'}
+rows = []
+
+# 获取网页信息
+async def fetch(session, url):
+    headers = {'User-Agent': random.choice(user_agent)}
+    async with session.get(url, headers=headers) as response:
+        return await response.text(encoding='utf-8')
+    
+# 解析网页
+async def parser(html):
+    pat = re.compile('data:(.*)}', re.S) # 使用正则
+    result = re.search(pat, html).group(1)
+    data = json.loads(result)
+    if len(data) == 0:
+        print('日期有错误，看看是不是日期不对。。。。。。')
+        sys.exit()
+    for d in data:
+        row = {key: value for key, value in d.items() if key in heads}
+        rows.append(row)
+    
+# 下载网页
+async def download(url):
+    async with aiohttp.ClientSession() as session:
+        html = await fetch(session, url)
+        await parser(html)
+
+#urls = [f'http://dcfm.eastmoney.com/em_mutisvcexpandinterface/api/js/get?callback=jQuery112305322211230994847_1618827285261&st=ShareSZ_Chg_One&sr=-1&ps=50&p='+str(p)+'&type=HSGT20_GGTJ_SUM&token=894050c76af8597a853f5b408b759f5d&js=%7B%22data%22%3A(x)%2C%22pages%22%3A(tp)%2C%22font%22%3A(font)%7D&filter=(DateType%3D%27jd%27)(HdDate%3D%27'+str(today)+'%27)' for p in range(1, 31)]
+urls = [f'http://dcfm.eastmoney.com/EM_MutiSvcExpandInterface/api/js/get?type=HSGT20_GGTJ_SUM&token=894050c76af8597a853f5b408b759f5d&st=ShareSZ_Chg_One&sr=-1&p='+str(p)+'&ps=50&js=var%20mXyeKPjW={pages:(tp),data:(x)}&filter=(DateType=%27jd%27%20and%20HdDate=%27'+str(today)+'%27)&rt=53931781' for p in range(1, 31)]
+
+# 利用asyncio模块进行异步IO处理
+async def main():
+    await asyncio.gather(*[download(url) for url in urls])
+               
+asyncio.run(main())
+# 将rows转化为pandas中的DataFrame
+df = pd.DataFrame(rows)
+df.columns = ['日期', '代码', '名称', '最新股价' , '市值', '市值增幅', '占流通股比', '占总股比']
+# 从大到小排序
+df.sort_values(by='市值', ascending=False)
+try:
+    df.to_excel(fname) # 保存成Excel文件
+except Exception as e:
+    print("请关闭文件后再试", e)
+
+df1 = df.nlargest(20, '占总股比')
+df2 = df.nlargest(20, "占流通股比")
+df3 = df.nlargest(10, "市值")
+# 占总股比前20和流通股比前20的交集
+df1_df2 = pd.merge(df1, df2, on=list(df.columns), how='inner')
+# 三者的交集
+df1_df2_df3 = pd.merge(df1_df2, df3, on=list(df.columns), how='inner')
+try:
+    df1_df2.to_excel(fname1) 
+except Exception as e:
+    print("请关闭文件后再试", e)
+   
+try:
+    df1_df2_df3.to_excel(fname2)
+except Exception as e:
+    print("请关闭文件后再试", e)
+
+stop = time.time()
+print(f"使用aiohttp共耗时{stop-start} S")
diff --git a/rabbitmq/emit_logs.py b/rabbitmq/emit_logs.py
new file mode 100644
index 0000000..d0181bd
--- /dev/null
+++ b/rabbitmq/emit_logs.py
@@ -0,0 +1,17 @@
+# coding: utf-8
+import pika
+import sys
+
+connection = pika.BlockingConnection(
+    pika.ConnectionParameters(host='localhost'))
+channel = connection.channel()
+
+channel.exchange_declare(exchange='logs', type='fanout')
+messages = ''.join(sys.argv[1:]) or 'info: Hello World!'
+
+channel.basic_publish(exchange='logs',
+                      routing_key='',
+                      body=messages)
+
+print("[x] Send {}".format(messages))
+connection.close()
diff --git a/rabbitmq/emit_logs_direct.py b/rabbitmq/emit_logs_direct.py
new file mode 100644
index 0000000..ad65ed5
--- /dev/null
+++ b/rabbitmq/emit_logs_direct.py
@@ -0,0 +1,19 @@
+# coding=utf-8
+import pika
+import sys
+
+connection = pika.BlockingConnection(
+    pika.ConnectionParameters(host='localhost'))
+channel = connection.channel()
+
+channel.exchange_declare(exchange='direct_logs',
+                         type='direct')
+
+severity = sys.argv[1] if len(sys.argv) > 2 else 'info'
+messages = ''.join(sys.argv[2:]) or "Hello World!"
+
+channel.basic_publish(exchange="direct_logs",
+                      routing_key=severity,
+                      body=messages)
+print('[x] Send {}:{}'.format(severity, messages))
+connection.close()
diff --git a/rabbitmq/emit_logs_topic.py b/rabbitmq/emit_logs_topic.py
new file mode 100644
index 0000000..9cf1101
--- /dev/null
+++ b/rabbitmq/emit_logs_topic.py
@@ -0,0 +1,20 @@
+# coding=utf-8
+import pika
+import sys
+
+connection = pika.BlockingConnection(
+    pika.ConnectionParameters(host='localhost'))
+channel = connection.channel()
+
+channel.exchange_declare(exchange='topic_logs',
+                         type='topic')
+
+routing_key = sys.argv[1] if len(sys.argv) > 2 else 'anonymous.info'
+messages = ''.join(sys.argv[2:]) or "Hello World!"
+
+channel.basic_publish(exchange='topic_logs',
+                      routing_key=routing_key,
+                      body=messages)
+
+print("[x] Send {}:{}".format(routing_key, messages))
+connection.close()
diff --git a/rabbitmq/kombu_emit_logs_topic.py b/rabbitmq/kombu_emit_logs_topic.py
new file mode 100644
index 0000000..ef21777
--- /dev/null
+++ b/rabbitmq/kombu_emit_logs_topic.py
@@ -0,0 +1,17 @@
+# coding=utf-8
+import sys
+
+from kombu import Connection, Producer, Queue, Exchange
+
+logs_exchange = Exchange('logs', 'topic', durable=True)
+
+URL = 'amqp://localhost'
+
+kombu_learn = sys.argv[1] if len(sys.argv) > 2 else 'anonymous.info'
+messages = ''.join(sys.argv[2:]) or "Hello World!"
+
+with Connection(URL) as conn:
+    producer = Producer(conn)
+    producer.publish(messages, exchange=logs_exchange,
+                     routing_key=kombu_learn,
+                     serializer='json')
diff --git a/rabbitmq/kombu_receive_logs_topic.py b/rabbitmq/kombu_receive_logs_topic.py
new file mode 100644
index 0000000..a276bea
--- /dev/null
+++ b/rabbitmq/kombu_receive_logs_topic.py
@@ -0,0 +1,41 @@
+# coding=utf-8
+import sys
+
+from kombu import Exchange, Queue, Connection, Consumer
+from kombu.async import Hub
+
+
+logs_exchange = Exchange(name='logs', type="topic", durable=True)
+
+URL = 'amqp://localhost'
+hub = Hub()
+
+binding_keys = sys.argv[1:]
+if not binding_keys:
+    sys.stderr.write("Usage: {} [binding_keys]...\n".format(sys.argv[0]))
+    sys.exit()
+
+tasks_queues = [Queue(binding_key,
+                      logs_exchange,
+                      exclusive=True,
+                      routing_key=binding_key)
+                for binding_key in binding_keys]
+
+print("[*] Waitting for logs. To exit press Ctrl+C")
+
+
+def on_messages(body, messages):
+    print("""
+        Body: {0}
+        Properties: {1}
+        DeliveryInfo: {2}
+        """.format(body, messages.properties, messages.delivery_info)
+          )
+
+with Connection(URL) as conn:
+    conn.register_with_event_loop(hub)
+    with Consumer(conn, tasks_queues, callbacks=[on_messages]):
+        try:
+            hub.run_forever()
+        except KeyboardInterrupt:
+            exit()
diff --git a/rabbitmq/kombu_receive_logs_topic_2.py b/rabbitmq/kombu_receive_logs_topic_2.py
new file mode 100644
index 0000000..e033266
--- /dev/null
+++ b/rabbitmq/kombu_receive_logs_topic_2.py
@@ -0,0 +1,32 @@
+# coding=utf-8
+import sys
+
+from kombu import Exchange, Queue, Connection
+from kombu.mixins import ConsumerMixin
+
+
+class Worker(ConsumerMixin):
+    logs_exchange = Exchange(name='logs', type="topic", durable=True)
+
+    def __init__(self, connection):
+        self.connection = connection
+
+    binding_keys = sys.argv[1:]
+    if not binding_keys:
+        sys.stderr.write('Usage: {} [binding_keys] ...\n'.format(sys.argv[0]))
+
+    def get_consumers(self, Consumer, channel):
+        return [Consumer([Queue(binding_key,
+                                self.logs_exchange,
+                                exclusive=True,
+                                routing_key=binding_key)
+                          for binding_key in self.binding_keys],
+                         callbacks=[self.on_messages])]
+
+    def on_messages(self, body, messages):
+        print('Body: {}'.format(body))
+
+
+URL = 'amqp://localhost'
+with Connection(URL) as connection:
+    Worker(connection).run()
diff --git a/rabbitmq/new_task.py b/rabbitmq/new_task.py
new file mode 100644
index 0000000..d738cc9
--- /dev/null
+++ b/rabbitmq/new_task.py
@@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+import pika
+import sys
+
+connection = pika.BlockingConnection(
+    pika.ConnectionParameters(host='localhost'))
+channel = connection.channel()
+channel.queue_declare(queue='task_queue', durable=True)
+# durable 持久化，即便rabbitMQ挂了也不会丢失信息
+messages = ''.join(sys.argv[1:]) or "Hello World!"
+channel.basic_publish(exchange='',
+                      routing_key='task_queue',
+                      body=messages,
+                      properties=pika.BasicProperties(
+                          delivery_mode=2))
+print('[x] Send {}'.format(messages))
+connection.close()
diff --git a/rabbitmq/receive.py b/rabbitmq/receive.py
new file mode 100644
index 0000000..d512421
--- /dev/null
+++ b/rabbitmq/receive.py
@@ -0,0 +1,22 @@
+# coding=utf-8
+import pika
+
+connection = pika.BlockingConnection(pika.ConnectionParameters(
+    host="localhost"))
+channel = connection.channel()
+channel.queue_declare(queue='hello')
+
+
+def callback(ch, method, properties, body):
+    print("[x] Receive {}".format(body))
+
+channel.basic_consume(callback,
+                      queue='hello',
+                      no_ack=True)  # 显示声明无消息确认回执
+
+print('[*] Waitting for messages. To exit press Ctrl+C')
+
+try:
+    channel.start_consuming()
+except KeyboardInterrupt:
+    channel.stop_consuming()
diff --git a/rabbitmq/receive_logs.py b/rabbitmq/receive_logs.py
new file mode 100644
index 0000000..de1bf52
--- /dev/null
+++ b/rabbitmq/receive_logs.py
@@ -0,0 +1,30 @@
+# coding: utf-8
+import pika
+
+connection = pika.BlockingConnection(
+    pika.ConnectionParameters(host='localhost'))
+channel = connection.channel()
+
+channel.exchange_declare(exchange='logs',
+                         type='fanout')
+
+result = channel.queue_declare(exclusive=True)
+# disconnect consumer the queue is down
+
+queue_name = result.method.queue
+
+channel.queue_bind(exchange='logs',
+                   queue=queue_name)
+
+print("[*] Waitting for logs. To exit press Ctrl+C")
+
+
+def callback(ch, method, properties, body):
+    print("[x] {}".format(body))
+
+channel.basic_consume(callback, queue=queue_name, no_ack=True)
+
+try:
+    channel.start_consuming()
+except KeyboardInterrupt:
+    channel.stop_consuming()
diff --git a/rabbitmq/receive_logs_direct.py b/rabbitmq/receive_logs_direct.py
new file mode 100644
index 0000000..3b89a15
--- /dev/null
+++ b/rabbitmq/receive_logs_direct.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+import pika
+import sys
+
+connection = pika.BlockingConnection(
+    pika.ConnectionParameters(host='localhost'))
+channel = connection.channel()
+
+channel.exchange_declare(exchange='direct_logs',
+                         type='direct')
+
+result = channel.queue_declare(exclusive=True)
+queue_name = result.method.queue
+
+severities = sys.argv[1:]
+if not severities:
+    sys.stderr.write(
+        "Usage: {} [info] [warning] [error]\n".format(sys.argv[0]))
+    sys.exit(1)
+
+for severity in severities:
+    channel.queue_bind(exchange='direct_logs',
+                       queue=queue_name,
+                       routing_key=severity)
+
+print("[*] Waitting for logs. To exit press Ctrl+C")
+
+
+def callback(ch, method, properties, body):
+    print("[x] {}:{}".format(method.routing_key, body))
+
+channel.basic_consume(callback, queue=queue_name, no_ack=True)
+try:
+    channel.start_consuming()
+except KeyboardInterrupt:
+    channel.stop_consuming()
diff --git a/rabbitmq/receive_logs_topic.py b/rabbitmq/receive_logs_topic.py
new file mode 100644
index 0000000..f246cd0
--- /dev/null
+++ b/rabbitmq/receive_logs_topic.py
@@ -0,0 +1,37 @@
+# coding=utf-8
+import pika
+import sys
+
+connection = pika.BlockingConnection(
+    pika.ConnectionParameters(host='localhost'))
+channel = connection.channel()
+
+channel.exchange_declare(exchange='topic_logs',
+                         type='topic')
+
+result = channel.queue_declare(exclusive=True)
+queue_name = result.method.queue
+
+binding_keys = sys.argv[1:]
+if not binding_keys:
+    sys.stderr.write("Usage: {} [binding_keys]...\n".format(sys.argv[0]))
+    sys.exit()
+
+for binding_key in binding_keys:
+    channel.queue_bind(queue=queue_name,
+                       exchange='topic_logs',
+                       routing_key=binding_key)
+
+print("[*] Waitting for logs. To exit press Ctrl+C")
+
+
+def callback(ch, method, properties, body):
+    print("[x] {}:{}".format(method.routing_key, body))
+
+channel.basic_consume(callback,
+                      queue=queue_name,
+                      no_ack=True)
+try:
+    channel.start_consuming()
+except KeyboardInterrupt:
+    channel.stop_consuming()
diff --git a/rabbitmq/rpc_client.py b/rabbitmq/rpc_client.py
new file mode 100644
index 0000000..ca45e27
--- /dev/null
+++ b/rabbitmq/rpc_client.py
@@ -0,0 +1,42 @@
+# coding=utf-8
+import pika
+import uuid
+
+
+class FibonacciRpcClient(object):
+
+    def __init__(self):
+        self.connection = pika.BlockingConnection(
+            pika.ConnectionParameters(host='localhost'))
+        self.channel = self.connection.channel()
+
+        result = self.channel.queue_declare(exclusive=True)
+        self.callback_queue = result.method.queue
+        self.channel.basic_consume(
+            self.on_response, no_ack=True, queue=self.callback_queue)
+
+    def on_response(self, ch, method, props, body):
+        if self.corr_id == props.correlation_id:
+            self.response = body
+
+    def call(self, n):
+        self.response = None
+        self.corr_id = str(uuid.uuid4())
+        self.channel.basic_publish(exchange='',
+                                   routing_key='rpc_queue',
+                                   properties=pika.BasicProperties
+                                   (
+                                       reply_to=self.callback_queue,
+                                       correlation_id=self.corr_id
+                                   ),
+                                   body=str(n)
+                                   )
+
+        while self.response is None:
+            self.connection.process_data_events()
+        return int(self.response)
+
+fibonacci_rpc = FibonacciRpcClient()
+print("[x] Requesting fib(40)")
+response = fibonacci_rpc.call(40)
+print("[.] Got {}".format(response))
diff --git a/rabbitmq/rpc_client_example.py b/rabbitmq/rpc_client_example.py
new file mode 100644
index 0000000..a39fad1
--- /dev/null
+++ b/rabbitmq/rpc_client_example.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+import pika
+import uuid
+
+
+class FibonacciRpcClient(object):
+
+    def __init__(self):
+        self.connection = pika.BlockingConnection(pika.ConnectionParameters(
+            host='localhost'))
+
+        self.channel = self.connection.channel()
+
+        result = self.channel.queue_declare(exclusive=True)
+        self.callback_queue = result.method.queue
+
+        self.channel.basic_consume(self.on_response, no_ack=True,
+                                   queue=self.callback_queue)
+
+    def on_response(self, ch, method, props, body):
+        if self.corr_id == props.correlation_id:
+            self.response = body
+
+    def call(self, n):
+        self.response = None
+        self.corr_id = str(uuid.uuid4())
+        self.channel.basic_publish(exchange='',
+                                   routing_key='rpc_queue',
+                                   properties=pika.BasicProperties(
+                                       reply_to=self.callback_queue,
+                                       correlation_id=self.corr_id,
+                                   ),
+                                   body=str(n))
+        while self.response is None:
+            self.connection.process_data_events()
+        return int(self.response)
+
+fibonacci_rpc = FibonacciRpcClient()
+
+print(" [x] Requesting fib(30)")
+response = fibonacci_rpc.call(30)
+print(" [.] Got %r" % response)
diff --git a/rabbitmq/rpc_server.py b/rabbitmq/rpc_server.py
new file mode 100644
index 0000000..a193d90
--- /dev/null
+++ b/rabbitmq/rpc_server.py
@@ -0,0 +1,37 @@
+# coding=utf-8
+import pika
+
+connection = pika.BlockingConnection(
+    pika.ConnectionParameters(host='localhost'))
+channel = connection.channel()
+
+channel.queue_declare(queue='rpc_queue')
+
+
+def fib(n):
+    if n == 0:
+        return 0
+    elif n == 1:
+        return 1
+    else:
+        return fib(n - 1) + fib(n - 2)
+
+
+def on_request(ch, method, props, body):
+    n = int(body)
+    print("[.] fib({})".format(n))
+    response = fib(n)
+
+    ch.basic_publish(exchange='',
+                     routing_key=props.reply_to,
+                     properties=pika.BasicProperties(
+                         correlation_id=props.correlation_id),
+                     body=str(response))
+
+    ch.basic_ack(delivery_tag=method.delivery_tag)
+
+channel.basic_qos(prefetch_count=1)
+channel.basic_consume(on_request, queue='rpc_queue')
+
+print("[x] Awaiting RPC requests")
+channel.start_consuming()
diff --git a/rabbitmq/rpc_server_example.py b/rabbitmq/rpc_server_example.py
new file mode 100644
index 0000000..ffc6432
--- /dev/null
+++ b/rabbitmq/rpc_server_example.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+import pika
+
+connection = pika.BlockingConnection(pika.ConnectionParameters(
+    host='localhost'))
+
+channel = connection.channel()
+
+channel.queue_declare(queue='rpc_queue')
+
+
+def fib(n):
+    if n == 0:
+        return 0
+    elif n == 1:
+        return 1
+    else:
+        return fib(n - 1) + fib(n - 2)
+
+
+def on_request(ch, method, props, body):
+    n = int(body)
+
+    print(" [.] fib(%s)" % n)
+    response = fib(n)
+
+    ch.basic_publish(exchange='',
+                     routing_key=props.reply_to,
+                     properties=pika.BasicProperties(
+                         correlation_id=props.correlation_id),
+                     body=str(response))
+    ch.basic_ack(delivery_tag=method.delivery_tag)
+
+channel.basic_qos(prefetch_count=1)
+channel.basic_consume(on_request, queue='rpc_queue')
+
+print(" [x] Awaiting RPC requests")
+channel.start_consuming()
diff --git a/rabbitmq/send.py b/rabbitmq/send.py
new file mode 100644
index 0000000..f9af0cd
--- /dev/null
+++ b/rabbitmq/send.py
@@ -0,0 +1,20 @@
+# coding=utf-8
+import sys
+import pika
+
+connection = pika.BlockingConnection(pika.ConnectionParameters(
+    host='localhost'))
+channel = connection.channel()
+channel.queue_declare(queue='hello')  # 声明 名为 hello 的 queue
+
+if len(sys.argv) != 1:
+    body = sys.argv[1]
+else:
+    body = "Hello World!"
+
+channel.basic_publish(exchange='',  # 默认交换机
+                      routing_key='hello',  # queue 需要指定路由键
+                      body=body)
+
+print("[x] Sent {}.".format(body))
+connection.close()
diff --git a/rabbitmq/worker.py b/rabbitmq/worker.py
new file mode 100644
index 0000000..288e7b6
--- /dev/null
+++ b/rabbitmq/worker.py
@@ -0,0 +1,24 @@
+# coding=utf-8
+import pika
+import time
+
+connection = pika.BlockingConnection(
+    pika.ConnectionParameters(host='localhost'))
+channel = connection.channel()
+channel.queue_declare(queue='task_queue', durable=True)
+print("[*] Waitting for messages. To exit press Ctrl+C")
+
+
+def callback(ch, method, properties, body):
+    print("[x] Received {}".format(body))
+    time.sleep(body.count(b'.'))  # 模拟耗时操作
+    print("[x] Done")
+    ch.basic_ack(delivery_tag=method.delivery_tag)
+
+channel.basic_qos(prefetch_count=1)  # 负载均衡
+channel.basic_consume(callback, queue='task_queue')
+
+try:
+    channel.start_consuming()
+except KeyboardInterrupt:
+    channel.stop_consuming()
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..c8a5b14
--- /dev/null
+++ b/test.py
@@ -0,0 +1,71 @@
+#!/usr/bin/python3
+# coding=utf-8
+
+import logging
+import re
+import aiohttp
+import asyncio
+from bs4 import BeautifulSoup
+from pymongo import MongoClient
+
+
+class DouBanCrawl():
+
+    def __init__(self, url):
+        self.url = url
+
+    async def fetch(self, url, headers):
+        res = await aiohttp.request('GET', url)
+        body = res.read()
+        return (await body)
+
+    def infos_get(self, html, name=None):
+        soup = BeautifulSoup(html, 'lxml')
+        scores = soup.select('.rating_num')
+        scores = [score.text for score in scores]
+        quotes = soup.select('p.quote > span')
+        quotes = [quote.text for quote in quotes]
+        pattern = r"https://movie.douban.com/subject/\w+/"
+        hrefs = re.findall(pattern, str(html))[::2]
+        title_list = soup.select('div.pic > a')
+        try:
+            titles = [re.findall(r'alt="(.*?)"', str(title))[0]
+                      for title in title_list]
+            img_links = [re.findall(r'src="(.*?)"', str(src))[0]
+                         for src in title_list]
+        except IndexError:
+            pass
+        return img_links, titles, scores, quotes, hrefs
+
+    async def save_info(self, page):
+        url = self.url.format(page)
+        # print(url)
+        with await sem:
+            html = await self.fetch(url, headers)
+        img_links, titles, scores, quotes, hrefs = self.infos_get(html)
+        for infos in zip(img_links, titles, scores, quotes, hrefs):
+            info = {'img': infos[0],
+                    'name': infos[1],
+                    'score': infos[2],
+                    'quote': infos[3],
+                    'href': infos[4]
+                    }
+            count = coll.find({"name": infos[1]}).count()
+            if count == 0:
+                coll.insert(info)
+
+
+if __name__ == '__main__':
+    url = 'https://movie.douban.com/top250?start={}&filter='
+    headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \
+                (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'}
+    client = MongoClient('localhost', 27017)
+    db = client.movies
+    coll = db.coll
+    douban = DouBanCrawl(url)
+    pages = range(0, 250, 25)
+    sem = asyncio.Semaphore(4)  # 限制协程并发量
+    loop = asyncio.get_event_loop()
+    f = asyncio.wait([douban.save_info(page) for page in pages])
+    loop.run_until_complete(f)  # %time 为Ipython 自带功能模块
+    print('Done')
diff --git "a/\344\270\200\344\270\252\345\274\202\346\255\245\347\210\254\350\231\253.md" "b/\344\270\200\344\270\252\345\274\202\346\255\245\347\210\254\350\231\253.md"
index 5508e4b..c4484b9 100644
--- "a/\344\270\200\344\270\252\345\274\202\346\255\245\347\210\254\350\231\253.md"
+++ "b/\344\270\200\344\270\252\345\274\202\346\255\245\347\210\254\350\231\253.md"
@@ -1,8 +1,5 @@
 ﻿# 一个异步爬虫
 
-标签（空格分隔）： python
-
----
 
 看了好多天的异步，今天终于算是大致理解了。模仿着写了一个异步小爬虫。以前很不理解哪里要使用异步，搞的头大。对于爬虫来说，耗时的地方是对服务器的请求，于是把对网页的请求使用异步即可！
 ```python
diff --git "a/\345\205\250\351\235\242\346\224\276\345\274\200\344\272\214\345\255\251\346\224\277\347\255\226\350\203\214\346\231\257\344\270\213\350\202\262\351\276\204\345\261\205\346\260\221\347\224\237\350\202\262\346\204\217\345\220\221\350\260\203\346\237\245.pdf" "b/\345\205\250\351\235\242\346\224\276\345\274\200\344\272\214\345\255\251\346\224\277\347\255\226\350\203\214\346\231\257\344\270\213\350\202\262\351\276\204\345\261\205\346\260\221\347\224\237\350\202\262\346\204\217\345\220\221\350\260\203\346\237\245.pdf"
deleted file mode 100644
index 1989bbd..0000000
Binary files "a/\345\205\250\351\235\242\346\224\276\345\274\200\344\272\214\345\255\251\346\224\277\347\255\226\350\203\214\346\231\257\344\270\213\350\202\262\351\276\204\345\261\205\346\260\221\347\224\237\350\202\262\346\204\217\345\220\221\350\260\203\346\237\245.pdf" and /dev/null differ
diff --git "a/\346\226\260\346\265\252\345\215\232\345\256\242\346\226\207\346\234\254\350\201\232\347\261\273.md" "b/\346\226\260\346\265\252\345\215\232\345\256\242\346\226\207\346\234\254\350\201\232\347\261\273.md"
new file mode 100644
index 0000000..2d32cb3
--- /dev/null
+++ "b/\346\226\260\346\265\252\345\215\232\345\256\242\346\226\207\346\234\254\350\201\232\347\261\273.md"
@@ -0,0 +1,268 @@
+﻿# 新浪博客文本聚类
+
+标签（空格分隔）： python
+
+---
+### 前言
+这是年前帮一位 QQ 好友完成论文报告所写的程序。有待完善。
+
+-----
+以下是本次报告所使用的程序, 全部使用 `Python` 编写。根据需要，编写了以下四个程序。
+
+1. `article_base_info.py ` 用于抓取文章的基本信息：文章标题、链接、作者、发表日期
+2. `article_content_gevent.py`  用于抓取文章内容
+3. `text_category.py`  对文章进行分类
+4. `format_data.py`   格式化数据
+
+下面是程序代码
+```python
+# coding: utf-8
+'''
+程序： article_base_info.py
+1. 此程序通过给定的页数抓取新浪博客文章的基本信息：文章标题、链接、作者、发表日期
+2. 数据保存到MongoDB中
+'''
+import re
+import concurrent.futures
+import requests
+from bs4 import BeautifulSoup as bs
+from pymongo import MongoClient
+
+
+def fetch(url):
+    res = requests.get(url)
+    res.encoding = 'gbk'
+    content = bs(res.text, 'lxml')
+    return content
+
+
+def base_info(html):
+    pattern = re.compile(r'http://blog.sina.com.cn/s/blog_.*\.html')
+    links = re.findall(pattern, str(html))
+    date_ = re.findall(r'\((\d{2,}.*)\)', str(html))
+    tle_auth = html.select('li')
+    authes = (auth.text.split(' ')[0] for auth in tle_auth)
+    titles = (title.text.split(' ')[-1] for title in tle_auth)
+    for infos in zip(links, titles, authes, date_):
+        yield infos
+
+
+def save(url):
+    html = fetch(url)
+    data = base_info(html)
+    client = MongoClient('localhost', 27017)
+    db = client.infos
+    coll = db.coll
+    for num, d in enumerate(data, 1):
+        datum = {
+            'links': d[0],
+            'title': d[1],
+            'auther': d[2],
+            'date': d[3]
+        }
+
+        count = coll.find({'links': d[0]}).count()
+        if count == 0:
+            coll.insert_one(datum)
+    print('{} is grabbed'.format(urls))
+
+
+if __name__ == '__main__':
+    url = 'http://roll.blog.sina.com.cn/list/other/index_{}.shtml'
+
+    start = int(input('请输入开始页数, 默认为1 >> '))
+    if not start:
+        start = 1
+
+    end = int(input('输入结束页数， 默认为100 >> '))
+    if not end:
+        end = 100
+
+    pages = range(start, end + 1)
+    urls = [url.format(page) for page in pages]
+
+    with concurrent.futures.ProcessPoolExecutor(max_workers=6) as executor:
+        executor.map(save, urls)
+
+```
+-----------
+```python
+# -*-coding: utf-8 -*-
+'''
+程序： article_content_gevent.py
+1. 此程序是用来抓取新浪博客的文章内容的!
+2.文章链接从 *筛选后所有博客数据.csv* 读取, 此 .csv 文件由 article_base_info.py 抓取生成。
+3. 由于作者删帖或者其他方面的原因,导致文章本身已不存在而其链接仍存在的现象。
+'''
+
+import os
+import csv
+import logging
+import requests
+import gevent
+from bs4 import BeautifulSoup as bs
+
+
+def fetch(url):
+    res = requests.get(url)
+    res.encoding = 'utf-8'
+    content = bs(res.text, 'lxml')
+    if not content:
+        logging.warning('The blog have been deleted!')
+    return content
+
+
+def content_get(html):
+    try:
+        artical = html.select('#sina_keyword_ad_area2')[0].text.strip()
+    except IndexError as e:
+        print(e)
+        logging.warning('the page is None')
+        artical = ' '
+    return artical
+
+
+def links_get(filename, urls=None):
+    with open(filename, 'r') as csvfile:
+        logging.info('readed the file {}'.format(filename))
+        reader = csv.reader(csvfile)
+        if urls is None:
+            urls = []
+        urls = [row[0] for row in reader]
+    return urls
+
+
+def download(url):
+    html = fetch(url)
+    artical = content_get(html)
+    with open('/home/mouse/Documents/artical/{}.txt'
+              .format(url[-12:-5]), 'w') as f:
+        f.write(artical)
+    logging.info('writring the {}'.format(url))
+
+
+if __name__ == '__main__':
+    logging.basicConfig(format='%(asctime)s %(message)s',
+                        level=logging.WARNING)
+    filename = '/home/mouse/我的坚果云/董姐的论文所需/筛选后所有博客数据.csv'
+    urls = links_get(filename)
+    if not os.path.isdir('/home/mouse/Documents/artical/'):
+        os.makedirs('/home/mouse/Documents/artical/')
+    threads = [gevent.spawn(download, url) for url in urls]
+    gevent.joinall(threads)
+
+```
+---
+```
+# coding=utf-8
+'''
+程序： text_category.py
+1. 此程序用于对从新浪博客抓取的文章进行自动分类
+2. 分类所使用的库来自 https://github.com/2shou/TextGrocery
+3. 工作流 读取已抓取的文章标题 -> 对文章分类 -> 写入分类后的文章标题
+'''
+import os
+import csv
+from tgrocery import Grocery
+from train_txt import train_src
+
+
+def category(title_lst, cates=None):  # 对文章分类
+    if cates is None:
+        cates = []
+    for title in title_lst:
+        cate = new_grocery.predict(title)
+        cates.append(cate.predicted_y)
+    return cates
+
+
+def get_artical_title(filename, title_lst=None):  # 读取文本
+    if title_lst is None:
+        title_lst = []
+
+    with open(filename, 'r') as f1:
+        f1_csv = csv.reader(f1)
+        title_lst = [row[1] for row in f1_csv]
+
+    return title_lst
+
+
+def write_cated_info(filename, new_filename):  # 写入已分类的文章
+    titles = get_artical_title(filename)
+    categ = category(titles)
+    with open(filename, 'r') as read_file:
+        reader = csv.reader(read_file)
+        for i, row in enumerate(reader):
+            row.append(categ[i])
+            with open(new_filename, 'a+') as write_file:
+                writer = csv.writer(write_file)
+                writer.writerow(row)
+
+            print 'writing the {} item'.format(i)
+    print 'Done....................'
+
+
+if __name__ == "__main__":
+    # filename和new_filename是文件路径，保存读取和写入的文件
+    # 更改路径名即可对不同的数据分类，前提要符合一定格式
+    filename = '/home/mouse/我的坚果云/董姐的论文所需/female7.csv'
+    new_filename = '/home/mouse/我的坚果云/董姐的论文所需/female7_2.csv'
+    if os.path.isfile(new_filename):
+        os.remove(new_filename)
+    grocery = Grocery('sample')
+    grocery.train(train_src)
+    grocery.save()
+    new_grocery = Grocery('sample')
+    new_grocery.load()
+    write_cated_info(filename, new_filename)
+```
+---
+```python
+# -*- coding: utf-8 -*-
+'''
+程序： format_data.py
+此程序是一个辅助程序，用于对 *标题整理数据.xlsx* 的格式化，标题整理数据转换为 csv 格式
+'''
+
+import csv
+from collections import namedtuple
+cate = ['社会冲突和问题', '毛泽东思想与政策', '政党与政府设置', '民主与法治', '民族和国际关系',
+        '媒体与言论自由', '资本主义与市场经济', '全球化和对外开放', '民生与福利',
+        '家庭冲突与伦理', '传统文化', '性与个人自由', '环境污染', '生态保护', ]
+Category = namedtuple(
+    'Category', 'social mao govm demcy nation media capi glob live home tran sex env eco')
+
+filename = '/home/mouse/我的坚果云/董姐的论文所需/标题整理数据2.csv'
+
+
+def train_text(filename, train_src=None):
+    if train_src is None:
+        train_src = []
+
+    def format_cate():
+        for emp in map(Category._make, csv.reader(open(filename, 'r'))):
+            social = (cate[0], emp.social)
+            mao = (cate[1], emp.mao)
+            govm = (cate[2], emp.govm)
+            demcy = (cate[3], emp.demcy)
+            nation = (cate[4], emp.nation)
+            media = (cate[5], emp.media)
+            capi = (cate[6], emp.capi)
+            glob = (cate[7], emp.glob)
+            live = (cate[8], emp.live)
+            home = (cate[9], emp.home)
+            tran = (cate[10], emp.tran)
+            sex = (cate[11], emp.sex)
+            env = (cate[12], emp.env)
+            eco = (cate[13], emp.eco)
+            yield social, mao, govm, demcy, nation, media, capi, glob, \
+                live, home, tran, sex, env, eco
+
+    for cat in format_cate():
+        train_src.extend(list(cat))
+
+    return train_src
+```
+以上程序均有本人编写，并且全部我的电脑上运行通过，但未在其他电脑和平台上测试，由于各种依赖和兼容性问题以及本人水平有限，不保证他人也能正常运行此程序。
+
+
diff --git "a/\346\262\252\346\267\261\346\270\257\351\200\232\345\255\243\345\272\246\345\207\200\346\265\201\345\205\245.py" "b/\346\262\252\346\267\261\346\270\257\351\200\232\345\255\243\345\272\246\345\207\200\346\265\201\345\205\245.py"
new file mode 100644
index 0000000..4977df4
--- /dev/null
+++ "b/\346\262\252\346\267\261\346\270\257\351\200\232\345\255\243\345\272\246\345\207\200\346\265\201\345\205\245.py"
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[10]:
+
+
+import re
+import os
+import json
+import csv
+import sys
+import pandas as pd
+import datetime
+import time
+import requests as request
+
+headers = {'HdDate', 'SCode', 'SName', 'NewPrice', 'ShareSZ_Chg_One',  'ShareSZ_Chg_Rate_One', 'LTZB_One', 'ZZB_One'}
+
+today = datetime.date.today()
+fname = str(today)+".xlsx"
+fname1 = "PPOS_POTE_"+fname
+fname2 = "PPOS_POTE_SZ_"+fname
+
+file_path = 'C:\\eastmoney'
+if not os.path.exists(file_path):
+    os.mkdir(file_path)
+os.chdir(file_path)
+
+def get_html(page):
+    url = 'http://dcfm.eastmoney.com/EM_MutiSvcExpandInterface/api/js/get?type=HSGT20_GGTJ_SUM&token=894050c76af8597a853f5b408b759f5d&st=ShareSZ_Chg_One&sr=-1&p='+str(page)+'&ps=50&js=var%20Hyeikcqr={pages:(tp),data:(x)}&filter=(DateType=%27jd%27%20and%20HdDate=%272021-02-10%27)&rt=53772857'
+    res = request.get(url).text
+    pat = re.compile('data:(.*)}', re.S)
+    result = re.search(pat, res).group(1)
+    data = json.loads(result)
+    return data
+
+def get_one_page_stock(page):
+    rows = []
+    data = get_html(page)
+    
+    for a in data:
+        row = {key: value for key, value in a.items() if key in headers}
+        rows.append(row)
+    return rows
+
+def get_all_stock():
+    all_rows = []
+    for page in range(1, 31):
+        print('\n正在下载第 %s 页表格' % page)
+        rows = get_one_page_stock(page)
+        all_rows.extend(rows)
+    print("下载已完成。。。。。")
+    return all_rows
+
+def get_pd():
+    
+    all_rows = get_all_stock()
+    df = pd.DataFrame(all_rows)
+    df.columns = ['日期', '代码', '名称', '最新股价' , '市值', '市值增幅', '占流通股比', '占总股比']
+    try:
+        df.to_excel(r'C:\eastmoney\%s' % fname)
+    except Exception as e:
+        print("请关闭文件后再试", e)
+    return df
+
+def good(f):
+    df1 = f.nlargest(20, '占总股比')
+    df2 = f.nlargest(20, "占流通股比")
+    df3 = f.nlargest(10, "市值")
+    df1_df2 = pd.merge(df1, df2, on=list(f.columns), how='inner')
+    df1_df2_df3 = pd.merge(df1_df2, df3, on=list(f.columns), how='inner')
+    try:
+        df1_df2.to_excel(r'C:\eastmoney\%s' % fname1)
+    except Exception as e:
+        print("请关闭文件后再试", e)
+        
+    try:
+        df1_df2_df3.to_excel(r'C:\eastmoney\%s' % fname2)
+    except Exception as e:
+        print("请关闭文件后再试", e)
+
+def main():
+    start_time = time.time()
+    if os.path.exists(fname):
+        df = pd.read_excel(fname)
+        good(df)
+    else:
+        df = get_pd()
+        good(df)
+
+    end_time = time.time() - start_time
+    print('文件保存在C盘eastmoney文件夹下')
+    print('程序耗时：{:.1f} s'.format(end_time))
+
+main()
+
diff --git "a/\350\261\206\347\223\243\347\224\265\345\275\261Top250 \347\210\254\350\231\253.md" "b/\350\261\206\347\223\243\347\224\265\345\275\261Top250 \347\210\254\350\231\253.md"
index e3e2fcd..aafb1d8 100644
--- "a/\350\261\206\347\223\243\347\224\265\345\275\261Top250 \347\210\254\350\231\253.md"	
+++ "b/\350\261\206\347\223\243\347\224\265\345\275\261Top250 \347\210\254\350\231\253.md"	
@@ -1,12 +1,95 @@
-﻿# 豆瓣电影Top250 爬虫
+﻿### 爬取豆瓣电影top250。
+---
+2016-11-04 更新
+
+使用 mongoDB 存储
+---
+**本次更新**
+抓取电影的如下简单信息
 
-标签（空格分隔）： python
+- 电影名
+- 封面
+- 评分
+- 评价人数
+- quote
+- 链接
 
 ---
+```python
+# coding=utf-8
+
+import logging
+import re
+import aiohttp
+import asyncio
+from bs4 import BeautifulSoup
+from pymongo import MongoClient
+
+
+class DouBanCrawl():
+
+    def __init__(self, url):
+        self.url = url
+
+    async def fetch(self, url, headers):
+        res = await aiohttp.request('GET', url)
+        body = res.read()
+        return (await body)
+
+    def infos_get(self, html, name=None):
+        soup = BeautifulSoup(html, 'lxml')
+        scores = soup.select('.rating_num')
+        scores = [score.text for score in scores]
+        quotes = soup.select('p.quote > span')
+        quotes = [quote.text for quote in quotes]
+        pattern = r"https://movie.douban.com/subject/\w+/"
+        hrefs = re.findall(pattern, str(html))[::2]
+        title_list = soup.select('div.pic > a')
+        try:
+            titles = [re.findall(r'alt="(.*?)"', str(title))[0]
+                      for title in title_list]
+            img_links = [re.findall(r'src="(.*?)"', str(src))[0]
+                         for src in title_list]
+        except IndexError:
+            pass
+        return img_links, titles, scores, quotes, hrefs
 
-## 爬取豆瓣电影top250。
+    async def save_info(self, page):
+        url = self.url.format(page)
+        # print(url)
+        with await sem:
+            html = await self.fetch(url, headers)
+        img_links, titles, scores, quotes, hrefs = self.infos_get(html)
+        for infos in zip(img_links, titles, scores, quotes, hrefs):
+            info = {'img': infos[0],
+                    'name': infos[1],
+                    'score': infos[2],
+                    'quote': infos[3],
+                    'href': infos[4]
+                    }
+            count = coll.find({"name": infos[1]}).count()
+            if count == 0:
+                coll.insert(info)
 
-## 1. 单线程版
+
+if __name__ == '__main__':
+    url = 'https://movie.douban.com/top250?start={}&filter='
+    headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \
+                (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'}
+    client = MongoClient('localhost', 27017)
+    db = client.movies
+    coll = db.coll
+    douban = DouBanCrawl(url)
+    pages = range(0, 250, 25)
+    sem = asyncio.Semaphore(4)  # 限制协程并发量
+    loop = asyncio.get_event_loop()
+    f = asyncio.wait([douban.save_info(page) for page in pages])
+    loop.run_until_complete(f)  # %time 为Ipython 自带功能模块
+    print('Done')
+```
+
+**以下为以前内容**
+#### 1. 单线程版
 ```python
 # -*- coding: utf-8 -*-
 
@@ -52,7 +135,7 @@ Out： CPU times: user 1.11 s, sys: 8 ms, total: 1.12 s
 Wall time: 3.58 s
 ```
 
-## 2. 多线程版
+#### 2. 多线程版
 ```python
 # -*- coding: utf-8 -*-
 
@@ -99,7 +182,7 @@ if __name__ == '__main__':
 Out： CPU times: user 1.16 s, sys: 172 ms, total: 1.33 s
 Wall time: 1.28 s
 ```
-### 使用线程池
+#### 使用线程池
 线程的创建和销毁是一个比较重的开销。所以，使用线程池，重用线程池中的线程！
 
 ```python
@@ -115,7 +198,7 @@ Out： CPU times: user 1.23 s, sys: 152 ms, total: 1.38 s
 Wall time: 1.29 s
 ```
 再加上一个异步的吧
-## 3. 异步版
+#### 3. 异步版
 此版本使用的是异步库`asyncio`和对其进行深度封装的库`aiohttp`。
 ```python
 # coding=utf-8
@@ -159,10 +242,60 @@ if __name__ == '__main__':
 Out: CPU times: user 984 ms, sys: 28 ms, total: 1.01 s
 Wall time: 1.67 s
 ```
+#### 4. 使用下 Gevent 看看效果如何。
+```python
+# coding=utf-8
+
+import re
+import requests
+import gevent
+from gevent.pool import Pool
+from bs4 import BeautifulSoup as bs
+
+
+def fetch(url):
+    s = requests.Session()
+    s.headers.update({"user-agent": user_agent})
+    return s.get(url)
+
+
+def title_get(url):
+    try:
+        result = fetch(url)
+    except requests.exceptions.RequestException:
+        return False
+    html = bs(result.text, 'lxml')
+    title_list = html.select('div.pic > a > img')
+    '''
+       title_list中的元素格式如下 e.g:
+        <img alt="这个杀手不太冷" class="" src="https://img3.doubanio.com
+        /view/movie_poster_cover/ipst/public/p511118051.jpg"/
+    '''
+    try:
+        title = [re.findall(r'alt="(.*?)"', str(title))[0]
+                 for title in title_list]
+    except IndexError:
+        pass
+    return title
+
+
+if __name__ == '__main__':
+    user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \
+                (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
+    url = 'https://movie.douban.com/top250?start={}&filter='
+    urls = [url.format(page) for page in range(0, 250, 25)]
+    # %time gevent.joinall([gevent.spawn(title_get, url) for url in urls])
+    pool = Pool(1000)
+    %time pool.map(title_get, urls)
+
+CPU times: user 960 ms, sys: 32 ms, total: 992 ms
+Wall time: 3.67 s
+```
 ## 总结
 
 **以上测试时间基于笔者电脑的配置和网络情况, 因人而异！**
 
 1. 单线程和多线程的对比，可以看到，使用多线程后速度提升了3倍。
 2. 使用线程池后，在限制线程数的状态下，依然有着不错的速度！
-3. 使用异步虽然在这里并没有多大的优势相对于多线程来说，但是当请求量很大时，就能显示出异步的强大了。在这里就不做过多赘述了！
\ No newline at end of file
+3. 使用异步虽然在这里并没有多大的优势相对于多线程来说，但是当请求量很大时，就能显示出异步的强大了。在这里就不做过多赘述了！
+4. 我也不明白为啥使用 `gevent` 后的速度尽然是这个这样子, 晕！！！