#-*- coding:utf-8 -*-
import time
#方法1:
def getDiff1(arr1,arr2):
start_time = time.time()
print('1_start:',start_time)
arr_more1 = []
arr_more2 = []
dic_result = {}
for str_1 in arr1:
dic_result[str(str_1)] = 1
for str_2 in arr2:
if dic_result.get(str(str_2)):
dic_result[str(str_2)] = 2
else:
arr_more2.append(str_2)
for key,val in dic_result.items():
if val == 1:
arr_more1.append(key)
print('arr1比arr2多的内容为:',len(arr_more1))
print('arr2比arr1多的内容为:',len(arr_more2))
end_time = time.time()
print('1_end:',end_time)
print('方法1_比对用时为',end_time-start_time)
#方法2:使用集合运算:
def getDiff2(arr1,arr2):
start_time = time.time()
print('2_start:',start_time)
set_1 = ()
set_2 = ()
#将列表转换为集合set()
set_1 = set(arr1)
set_2 = set(arr2)
set_more1 = ()
set_more2 = ()
#集合运算
set_1_2 = set_1 & set_2
set_more1 = set_1 -set_1_2
set_more2 = set_2 -set_1_2
print('arr1比arr2多的内容为:',len(set_more1))
print('arr2比arr1多的内容为:',len(set_more2))
end_time = time.time()
print('2_end:',end_time)
print('方法2_比对用时为',end_time-start_time)
#测试
# 初始化500w条数据数据
arr1 = []
arr2 = []
i = 0
while(True):
arr1.append(i * 2)
arr2.append(i * 3)
i += 1
if i > 5000000:
break
print('arr1的长度为:',len(arr1))
print('arr2的长度为:',len(arr2))
print('+'*30)
getDiff1(arr1,arr2)
print('-'*30)
getDiff2(arr1,arr2)
|