1. Array - Sliding Window
Sliding Window 首先一定是处理(连续) subarray或substring的问题(不连续的subsequence);
其次,只能用于处理 "window是否满足条件" 与 "window长度“ 直接相关的问题,也就是说锁死某个index作为window的末尾,那么要么window的长度是确定的(implement-strstr ) ,要么就是window越长越满足条件 (Minimum Window Substring ) 或者window越长越不满足条件 ( longest-substring-with-at-most-k-distinct-characters/) (前者求 shortest window,后者求 longest window)。
第二条原则主要是为了和用dynamic programming求subarray的问题区别开,比如:
# 正整数数组, 求sum为target的subarray
# => sliding window
def subarray_sum_positive(arr, target):
window_sum = 0
start_idx = 0
for end_idx in range(len(arr)):
window_sum += arr[end_idx]
while window_sum > target:
window_sum -= arr[start_idx]
start_idx += 1
if window_sum == target:
return arr[start_idx:(end_idx+1)]
return None
# 有负数的整数数组, 求sum为target的subarray
# => prefix sum (dynamic programming)
def subarray_sum(arr, target):
sum_idx_map = {0: -1}
sum = 0
for idx, ele in enumerate(arr):
sum += ele
if sum - target in sum_idx_map:
start_idx = sum_idx_map[sum - target] + 1
return arr[start_idx:(idx+1)]
sum_idx_map[sum] = idx
return None
Sliding Window可以用两个全局pointer start_idx
, end_idx
分别记录段首段尾。以start_idx=0
为起点,end_idx
向右扩张,然后在当前end_idx
为结尾的前提下(锁死window末尾),收缩 start_idx
找到满足条件的最短window或者最长window,与全局的window最值比较。 (因为两个pointer均不会后退所以可以保证时间复杂度不超过)
# [Template]
import collections
def slide_window(s):
if len(s) == 0:
return ''
longest = '' # the global state
# window state variables
start_idx = 0
window_counter = collections.Counter()
for end_idx in range(len(str)):
char = s[end_idx]
window_counter[char] += 1
# shrink window until it meets requirement
while len(window_counter) > k:
start_char = s[start_idx]
window_counter[start_char] += 1
if window_counter[start_char] == 0:
window_counter.pop(start_char)
start_idx += 1
if end_idx - start_idx + 1 > len(longest): # and meets requirement
longest = s[start_idx:(end_idx + 1)]
return longest
e.g.1 Given input -> "I have 36 books, 40 pens2."; output -> "I evah 36 skoob, 40 2snep." e.g.2 Compress the string "aabcccccaaa" to "a2b1c5a3"
e.g.3 https://leetcode.com/problems/find-all-anagrams-in-a-string/.
import collections
# 1. substing 2. window长度确定
class Solution:
def findAnagrams(self, s: str, pattern: str) -> List[int]:
if len(s) == 0:
return ''
match_indices = [] # the global state
pattern_counter = collections.Counter()
for char in pattern:
pattern_counter[char] += 1
# window state variables
start_idx = 0
window_counter = collections.Counter()
for end_idx in range(len(s)):
char = s[end_idx]
window_counter[char] += 1
# shrink window until it meets requirement
while start_idx < len(s) and window_counter[s[start_idx]] > pattern_counter[s[start_idx]]:
window_counter[s[start_idx]] -= 1
if window_counter[s[start_idx]] == 0:
window_counter.pop(s[start_idx])
start_idx += 1
# and meets requirement
if window_counter == pattern_counter:
match_indices.append(start_idx)
return match_indices
e.g.4 https://leetcode.com/problems/longest-substring-with-at-most-k-distinct-characters/
import collections
class Solution:
def lengthOfLongestSubstringKDistinct(self, s: str, k: int) -> int:
if len(s) == 0:
return 0
longest_length = 0 # the global state
# window state variables
start_idx = 0
window_counter = collections.Counter()
for end_idx in range(len(s)):
char = s[end_idx]
window_counter[char] += 1
# shrink window until it meets requirement
while len(window_counter) > k:
start_char = s[start_idx]
window_counter[start_char] -= 1
if window_counter[start_char] == 0:
window_counter.pop(start_char)
start_idx += 1
if end_idx - start_idx + 1 > longest_length: # and meets requirement
longest_length = end_idx - start_idx + 1
return longest_length
e.g.5 https://leetcode.com/problems/minimum-window-substring/
import typing
from collections import Counter
class Solution:
def minWindow(self, s: str, pattern: str) -> str:
if len(s) == 0:
return ''
shortest = None # the global state
pattern_counter = Counter()
for char in pattern:
pattern_counter[char] += 1
# window state variables
start_idx = 0
# 1. window state用什么记录
window_counter = Counter()
effective_count = 0
for end_idx in range(len(s)):
char = s[end_idx]
window_counter[char] += 1
if window_counter[char] <= pattern_counter[char]:
effective_count += 1
# shrink window until it meets requirement
# 2. shrink的条件是什么
while start_idx < len(s) and window_counter[s[start_idx]] > pattern_counter[s[start_idx]]:
start_char = s[start_idx]
window_counter[start_char] -= 1
if window_counter[start_char] == 0:
window_counter.pop(start_char)
start_idx += 1
# 3. window需要满足的条件是什么
if effective_count == len(pattern): # and meets requirement
if shortest is None or (end_idx - start_idx + 1) < len(shortest):
shortest = s[start_idx:(end_idx + 1)]
return shortest if shortest is not None else ''
e.g.5 典型的将复杂问题划归为简单问题的思维过程,就是首先用sliding window,而每一段的处理是1.4.1的统计问题。优化:因为段到段之间的transition不需要重新计算hash table,所以可以只计算增量来改善时间复杂度。对于Sliding Window问题,套用模板之外,我们只需要考虑三个很小的问题:
1. window state用什么记录 2. shrink的条件是什么 3. window需要满足的条件是什么
e.g.6 https://leetcode.com/problems/substring-with-concatenation-of-all-words/
import collections
from typing import List
"""
Input: s = "barfoothefoobarman", words = ["foo","bar"]
Output: [0,9]
"""
class Solution:
def findSubstring(self, s: str, words: List[str]) -> List[int]:
if len(s) == 0 or len(words) == 0:
return []
match_indices = []
pattern_counter = collections.Counter()
for word in words:
pattern_counter[word] += 1
word_len = len(words[0])
for start_idx in range(word_len):
# window state variables
window_counter = collections.Counter()
effective_count = 0
end_idx = start_idx
while end_idx < len(s):
end_word = s[end_idx:(end_idx + word_len)]
window_counter[end_word] += 1
if window_counter[end_word] <= pattern_counter[end_word]:
effective_count += 1
# shrink window until it meets requirement
while start_idx < len(s) and window_counter[s[start_idx:(start_idx + word_len)]] > pattern_counter[s[start_idx:(start_idx + word_len)]]:
start_word = s[start_idx:(start_idx + word_len)]
window_counter[start_word] -= 1
start_idx += word_len
# and meets requirement
if effective_count == len(words) and end_idx - start_idx + word_len == word_len * len(words):
match_indices.append(start_idx)
end_idx += word_len
return match_indices
Last updated
Was this helpful?