2020-04
21

PK我自己

By xrspook @ 19:04:38 归类于: 扮IT

用了几分钟时间,写了个用字典法查找10万单词的词汇表回文词的脚本。字典法肯定要比列表二分法快,但到底快多少呢?实测大概10倍。相比之下,字典法语言实在简练太多。二分法的函数还得考虑递归和起点终点神马,字典法一个in杀到底。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# import time
# def in_bisect(library, first, last, myword): # 二分法搜索,10万数据查询最多只需不到20步
#     if first > last: # 这是一句拯救了我的条件
#         return -1
#     else:
#         mid = (first + last)//2
#         if myword == library[mid]:
#             return mid
#         elif library[mid] > myword:
#             return in_bisect(library, first, mid-1, myword)
#         else:
#             return in_bisect(library, mid+1, last, myword)
# j = 0
# count = 0
# library = []
# fin = open('words.txt')
# for line in fin:
#     word = line.strip()
#     library.append(word)
# library.sort()
# start = time.time()
# for i in range(len(library)-1): # 二分法搜索 
#     j = in_bisect(library, 0, len(library)-1, library[i][::-1])
#     if j > -1 and library[i] < library[j]:
#         print(library[i], library[j])
#         count += 1
# print(count)
# end = time.time()
# print(end - start)
# 397, 1.2810001373291016 # 二分法搜索
 
import time
def set_dict(fin): # 字典法搜索
    d = {}
    for line in fin:
        word = line.strip()
        d[word] = 0
    return d
count = 0
fin = open('words.txt')
start = time.time()
mydict = set_dict(fin)
for word in mydict:
    if word[::-1] in mydict and word < word[::-1]:
        print(word, word[::-1])
        count += 1
print(count)
end = time.time()
print(end - start)
# 397, 0.14300012588500977 # 字典法搜索
2020-04
19

秒杀的感觉真爽!

By xrspook @ 20:06:17 归类于: 扮IT

配合我的二分法搜索,10万单词找出397对回文词,我只需1.7秒。list.index()需要291秒,期间如果不输出单词,你绝对认为自己的电脑卡死了!参考答案用了70秒,而且搜出了885对,其中91对准确来说是91个,那些词自己跟自己回文,自己跟自己根本算不上两个词好吗!余下的397对是因为A词和B词算一对,B词和A词他们又输出了一遍。参考答案的语句很精炼,但特殊情况没有处理好。

赢了参考答案,真爽!!!

练习11:两个词如果互为逆序,就称它们是『翻转配对』。写一个函数来找一下在这个词汇表中所有这样的词对。

Exercise 11: Two words are a “reverse pair” if each is the reverse of the other. Write a program that finds all the reverse pairs in the word list. Solution: http://thinkpython2.com/code/reverse_pair.py.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
import time
def in_bisect(library, first, last, myword): # 二分法搜索,10万数据查询最多只需不到20步
    if first > last: # 这是一句拯救了我的条件
        return -1
    else:
        mid = (first + last)//2
        if myword == library[mid]:
            return mid
        elif library[mid] > myword:
            return in_bisect(library, first, mid-1, myword)
        else:
            return in_bisect(library, mid+1, last, myword)
j = 0
count = 0
library = []
fin = open('words.txt')
for line in fin:
    word = line.strip()
    library.append(word)
library.sort()
start = time.time()
for i in range(len(library)-1): # 二分法搜索 
    j = in_bisect(library, 0, len(library)-1, library[i][::-1])
    if j > -1 and library[i] < library[j]:
        print(library[i], library[j])
        count += 1
# for i in range(len(library)-1): # list.index()搜索
#     if library[i][::-1] in library:
#         j = library.index(library[i][::-1], 0, len(library)-1)
#         if library[i] < library[j]:
#             print(library[i], library[j])
#             j = 0
#             count += 1
print(count)
end = time.time()
print(end - start)
# abut tuba
# ad da
# ados soda
# agar raga
# agas saga
# agenes senega
# ah ha
# aider redia
# airts stria
# ajar raja
# alif fila
# am ma
# amen nema
# amis sima
# an na
# anger regna
# animal lamina
# animes semina
# anon nona
# ante etna
# are era
# ares sera
# aril lira
# arris sirra
# arum mura
# at ta
# ate eta
# ates seta
# auks skua
# avid diva
# avo ova
# ay ya
# bad dab
# bag gab
# bal lab
# bals slab
# ban nab
# bard drab
# bas sab
# bat tab
# bats stab
# bed deb
# ben neb
# bid dib
# big gib
# bin nib
# bins snib
# bird drib
# bis sib
# bog gob
# bos sob
# bots stob
# bows swob
# brad darb
# brag garb
# bud dub
# bun nub
# buns snub
# bur rub
# burd drub
# burg grub
# bus sub
# but tub
# buts stub
# cam mac
# cap pac
# cares serac
# cod doc
# cram marc
# cud duc
# dag gad
# dah had
# dahs shad
# dam mad
# dap pad
# dart trad
# daw wad
# debut tubed
# decal laced
# dedal laded
# deem meed
# deep peed
# deeps speed
# deer reed
# dees seed
# defer refed
# degami imaged
# deifier reified
# deil lied
# deke eked
# del led
# delf fled
# deliver reviled
# dels sled
# demit timed
# denier reined
# denies seined
# denim mined
# dens sned
# depot toped
# depots stoped
# derat tared
# derats stared
# dessert tressed
# desserts stressed
# devas saved
# devil lived
# dew wed
# dewans snawed
# dexes sexed
# dial laid
# dialer relaid
# diaper repaid
# dig gid
# dim mid
# dinar ranid
# diols sloid
# dirts strid
# do od
# dog god
# dom mod
# don nod
# doom mood
# door rood
# dor rod
# dormin nimrod
# dorp prod
# dos sod
# dot tod
# drail liard
# draw ward
# drawer reward
# draws sward
# dray yard
# dual laud
# ducs scud
# duel leud
# duo oud
# dup pud
# dups spud
# eat tae
# edile elide
# edit tide
# eel lee
# eh he
# elides sedile
# em me
# emes seme
# emir rime
# emit time
# emits stime
# enol lone
# er re
# ergo ogre
# eros sore
# ervil livre
# etas sate
# even neve
# evil live
# eviler relive
# fer ref
# fires serif
# flog golf
# flow wolf
# fool loof
# gal lag
# gals slag
# gam mag
# gan nag
# gar rag
# gas sag
# gat tag
# gats stag
# gel leg
# gelder redleg
# get teg
# gip pig
# girt trig
# gnar rang
# gnat tang
# gnats stang
# gnaws swang
# gnus sung
# got tog
# gul lug
# gulp plug
# guls slug
# gum mug
# gums smug
# guns snug
# gut tug
# habus subah
# hahs shah
# hales selah
# hap pah
# hay yah
# hey yeh
# ho oh
# hoop pooh
# hop poh
# is si
# it ti
# jar raj
# kay yak
# keel leek
# keels sleek
# keep peek
# keets steek
# kips spik
# knaps spank
# knar rank
# knits stink
# lager regal
# lair rial
# lap pal
# lares seral
# larum mural
# las sal
# leer reel
# lees seel
# leets steel
# leper repel
# lever revel
# levins snivel
# liar rail
# lin nil
# lion noil
# lit til
# lobo obol
# loom mool
# loons snool
# loop pool
# loops spool
# loot tool
# looter retool
# loots stool
# lop pol
# lotos sotol
# macs scam
# maes seam
# map pam
# mar ram
# marcs scram
# mart tram
# mat tam
# maws swam
# may yam
# meet teem
# meter retem
# mho ohm
# mils slim
# mir rim
# mis sim
# mon nom
# moor room
# moot toom
# mot tom
# mures serum
# mus sum
# muts stum
# namer reman
# nap pan
# naps span
# neep peen
# net ten
# neves seven
# new wen
# nip pin
# nips spin
# nit tin
# no on
# nolos solon
# nos son
# not ton
# notes seton
# now won
# nu un
# nus sun
# nut tun
# nuts stun
# oat tao
# oohs shoo
# oot too
# os so
# ow wo
# pacer recap
# pals slap
# pans snap
# par rap
# part trap
# parts strap
# pas sap
# pat tap
# paw wap
# paws swap
# pay yap
# peels sleep
# pees seep
# per rep
# pets step
# pins snip
# pis sip
# pit tip
# pols slop
# pools sloop
# poons snoop
# port trop
# ports strop
# pot top
# pots stop
# pow wop
# pows swop
# prat tarp
# pupils slipup
# puris sirup
# pus sup
# put tup
# raps spar
# rat tar
# rats star
# raw war
# ray yar
# rebus suber
# rebut tuber
# recaps spacer
# redes seder
# redips spider
# redraw warder
# redrawer rewarder
# rees seer
# reflet telfer
# reflow wolfer
# reknit tinker
# reknits stinker
# relit tiler
# remeet teemer
# remit timer
# rennet tenner
# repins sniper
# res ser
# rot tor
# sallets stellas
# saps spas
# sat tas
# saw was
# scares seracs
# secret terces
# seeks skees
# selahs shales
# sirs sris
# sit tis
# six xis
# skeets steeks
# skips spiks
# sleeps speels
# sleets steels
# slit tils
# sloops spools
# smart trams
# smuts stums
# snaps spans
# snaw wans
# snaws swans
# snips spins
# snit tins
# snoops spoons
# snoot toons
# snot tons
# snow wons
# sow wos
# spat taps
# spay yaps
# spirt trips
# spirts strips
# spit tips
# sports strops
# spot tops
# spots stops
# sprat tarps
# sprits stirps
# staw wats
# stew wets
# stow wots
# stows swots
# straw warts
# strow worts
# struts sturts
# swat taws
# sway yaws
# swot tows
# tav vat
# taw wat
# tew wet
# tort trot
# tow wot
# trow wort
# way yaw
# tort trot 
# tow wot
# trow wort
# way yaw
# 397, 291.1146504878998 # list.index()搜索
# 397, 1.7120981216430664 # 二分法搜索
# 885, 70.3680248260498 # 参考答案运行结果
2020-04
18

死磕二分法搜索

By xrspook @ 15:00:07 归类于: 扮IT

我是看着题目的中文版做题的

练习10:要检查一个单词是不是在上面这个词汇列表里,你可以使用 in 运算符,但可能会很慢,因为这个 in 运算符要从头到尾来搜索整个词汇表。我们知道这些单词是按照字母表顺序组织的,所以我们可以加速一下,用一种对折搜索(也叫做二元搜索),这个过程就和你在现实中用字典来查单词差不多。你在中间部分开始,看看这个要搜索的词汇是不是在中间位置的前面。如果在前面,就又对前半部分取中间,继续这样来找。当然了,不在前半部分,就去后半部分找了,思路是这样的。不论怎样,每次都会把搜索范围缩减到一半。如果词表包含了113809个单词,最多就是17步就能找到单词,或者能确定单词不在词汇表中。那么问题来了,写一个函数,名为 in_bisect,接收一个整理过的按照字母顺序排列的列表,以及一个目标值,在列表中查找这个值,找到了就返回索引位置,找不到就返回空。

做到死去活来词语在词汇表里有索引正确,没有时却会疯掉的时候我不得不去看答案,看到答案后傻眼了,答案对单词的判断只有True和False,再去找原题,我那个去,题目改了!不要求索引了好吗!

Exercise 10: To check whether a word is in the word list, you could use the in operator, but it would be slow because it searches through the words in order. Because the words are in alphabetical order, we can speed things up with a bisection search (also known as binary search), which is similar to what you do when you look a word up in the dictionary (the book, not the data structure). You start in the middle and check to see whether the word you are looking for comes before the word in the middle of the list. If so, you search the first half of the list the same way. Otherwise you search the second half. Either way, you cut the remaining search space in half. If the word list has 113,809 words, it will take about 17 steps to find the word or conclude that it’s not there. Write a function called in_bisect that takes a sorted list and a target value and returns True if the word is in the list and False if it’s not. Or you could read the documentation of the bisect module and use that! Solution: http://thinkpython2.com/code/inlist.py.

又纠结一番后我终于写出了一句“first > last”返回例外情况,终于,世界被拯救了!记录索引和不记录索引很不一样啊,按照参考答案的解法,i即便返回也永远是1,索引无能。纠结是有好处的,让我明白到二分法搜索有多么的高效,简直甩while循环几十条街,但如果真索引的话,估计我会很懒地直接用list.index(),虽然用之前必须用in历遍列表,判断是否存在。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import time
def in_bisect(library, first, last, myword): # 二分法搜索,10万数据查询最多只需不到20步
    if first > last: # 这是一句拯救了我的条件
        return None
    else:
        mid = (first + last)//2
        if myword == library[mid]:
            return mid
        elif library[mid] > myword:
            return in_bisect(library, first, mid-1, myword)
        else:
            return in_bisect(library, mid+1, last, myword)
myword = 'zoo' # input('myword is: ')
i = 0
library = []
fin = open('words.txt')
for line in fin:
    word = line.strip()
    library.append(word)
library.sort()
start = time.time()
 
# j = 0
# while i < len(library) - 1: # 我的脑洞第一反应用的循环
#     if myword == library[i]:
#         j = i
#         break
#     i += 1
# if j == 0:
#     print('myword is not in library')
# else:
#     print('index =', j)
 
# if myword in library: # 伟大列表自带的查询索引号,但先得确定单词在那里
#     i = library.index(myword, 0, len(library)-1)
# if i == 0:
#     print('myword is not in library')
# else:
#     print('index =', i)
 
if in_bisect(library, 0, len(library), myword) == None: 
    print('myword is not in library')
else:
    print('index =', in_bisect(library, 0, len(library), myword))
end = time.time()
print(end-start)
# myword is not in library while 0.07   index 0.003  bisect 0.001
# apple 4450               while 0.003  index 0.001  bisect 0.001
# zoo 113707               while 0.07   index 0.005  bisect 0.001
# while,index和bisect没有对比就没有伤害
© 2004 - 2024 我的天 | Theme by xrspook | Power by WordPress