Alien Dictionary (hard)
Dry-run templates
The following is the implementation of the solution provided for the Alien Dictionary (hard) problem:
from collections import dequedef find_order(words):if len(words) == 0:return ""# a. Initialize the graphinDegree = {} # count of incoming edgesgraph = {} # adjacency list graphfor word in words:for character in word:inDegree[character] = 0graph[character] = []# b. Build the graphfor i in range(0, len(words)-1):# find ordering of characters from adjacent wordsw1, w2 = words[i], words[i + 1]for j in range(0, min(len(w1), len(w2))):parent, child = w1[j], w2[j]if parent != child: # if the two characters are different# put the child into it's parent's listgraph[parent].append(child)inDegree[child] += 1 # increment child's inDegreebreak # only the first different character between the two words will help us find the order# c. Find all sources i.e., all vertices with 0 in-degreessources = deque()for key in inDegree:if inDegree[key] == 0:sources.append(key)# d. For each source, add it to the sortedOrder and subtract one from all of its children's in-degrees# if a child's in-degree becomes zero, add it to the sources queuesortedOrder = []while sources:vertex = sources.popleft()sortedOrder.append(vertex)for child in graph[vertex]: # get the node's children to decrement their in-degreesinDegree[child] -= 1if inDegree[child] == 0:sources.append(child)# if sortedOrder doesn't contain all characters, there is a cyclic dependency between characters, therefore, we# will not be able to find the correct ordering of the charactersif len(sortedOrder) != len(inDegree):return ""return ''.join(sortedOrder)def main():print("Character order: " + find_order(["ba", "bc", "ac", "cab"]))print("Character order: " + find_order(["cab", "aaa", "aab"]))print("Character order: " + find_order(["ywx", "wz", "xww", "xz", "zyy", "zwz"]))main()
Sample input #1
Words: ["ba", "bc", "ac", "cab"]
Note: The column Outer loops represents the loops in our algorithm and their iteration number. For example, column entry
while: 2
represents the second iteration of thewhile
loop.Since, there are 3 loops, the column Line number specifies the loop we’re considering. The column Inner loop iteration represents the nested
for
loops (line 20 - line 26 and line 40 - line 43).
Line number | Outer loops | Inner loop iteration | graph | w1 | w2 | parent | child | inDegree[child] | inDegree | sources | vertex | sortedOrder |
5-14 | {'b': [], 'c': [], 'a': []} | {'b': 0, 'c': 0, 'a': 0} | ||||||||||
17-26 | for: 1 | 1 | {'b': [], 'c': [], 'a': []} | ba | bc | b | b | {'b': 0, 'c': 0, 'a': 0} | ||||
17-26 | for: 1 | 2 | {'b': [], 'c': [], 'a': ['c']} | ba | bc | a | c | {'b': 0, 'c': 1, 'a': 0} | ||||
17-26 | for: 2 | 1 | {'b': ['a'], 'c': [], 'a': ['c']} | bc | ac | b | a | {'b': 0, 'c': 1, 'a': 1} | ||||
17-26 | for: 3 | 1 | {'b': ['a'], 'c': [], 'a': ['c', 'c']} | ac | cab | a | c | {'b': 0, 'c': 2, 'a': 1} | ||||
29-32 | for: 1 | {'b': ['a'], 'c': [], 'a': ['c', 'c']} | {'b': 0, 'c': 2, 'a': 1} | ['b'] | ||||||||
29-32 | for: 2 | {'b': ['a'], 'c': [], 'a': ['c', 'c']} | {'b': 0, 'c': 2, 'a': 1} | ['b'] | ||||||||
29-32 | for: 3 | {'b': ['a'], 'c': [], 'a': ['c', 'c']} | {'b': 0, 'c': 2, 'a': 1} | ['b'] | ||||||||
37-43 | while: 1 | 1 | {'b': ['a'], 'c': [], 'a': ['c', 'c']} | a | 0 | {'b': 0, 'c': 2, 'a': 0} | ['a'] | b | ['b'] | |||
37-43 | while: 2 | 1 | {'b': ['a'], 'c': [], 'a': ['c', 'c']} | c | 1 | {'b': 0, 'c': 1, 'a': 0} | [ ] | a | ['b', 'a'] | |||
37-43 | while: 2 | 2 | {'b': ['a'], 'c': [], 'a': ['c', 'c']} | c | 0 | {'b': 0, 'c': 0, 'a': 0} | ['c'] | a | ['b', 'a'] | |||
37-43 | while: 3 | 1 | {'b': ['a'], 'c': [], 'a': ['c', 'c']} | {'b': 0, 'c': 0, 'a': 0} | [ ] | c | ['b', 'a', 'c'] |
Sample input #2
Students make be encouraged to fill the dry-run table with the following input:
Words: ["cab", "aaa", "aab"]
Line number | Outer loops | Inner loop iteration | graph | w1 | w2 | parent | child | inDegree[child] | inDegree | sources | vertex | sortedOrder |
5-14 | {'c': [], 'b': [], 'a': []} | {'c': 0, 'b': 0, 'a': 0} | ||||||||||
17-26 | for: 1 | 1 | {'c': ['a'], 'b': [], 'a': []} | cab | aaa | c | a | {'c': 0, 'b': 0, 'a': 1} | ||||
17-26 | for: 2 | 1 | {'c': ['a'], 'b': [], 'a': []} | aaa | aab | a | a | {'c': 0, 'b': 0, 'a': 1} | ||||
17-26 | for: 2 | 2 | {'c': ['a'], 'b': [], 'a': []} | aaa | aab | a | a | {'c': 0, 'b': 0, 'a': 1} | ||||
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
Step-by-step solution construction
In this problem, characters in the words are considered as vertices. The lexical order represents the edges. For example, if a
comes before b
, this means that there is an edge from a
to b
: a -> b
.
The first step is to initialize the graph with vertices and empty adjacency lists.
We also maintain a dictionary inDegree
that will store the number of incoming edges for each vertex. However, in this step, we’ll initialize it to 0.
from collections import dequedef find_order(words):if len(words) == 0:return ""# a. Initialize the graphinDegree = {} # count of incoming edgesgraph = {} # adjacency list graphprint("Initializing graph and inDegree")j = 1for word in words:print("\tLoop iteration: ", j, sep = "")j+=1k = 1print("\t\tword: ", word)for character in word:print("\t\tInner loop iteration: ", k, sep = "")k+=1print("\t\t\tcharacter: ", character, sep = "")print("\t\t\tAdding vertex '", character, "' to inDegree and the graph", sep = "")inDegree[character] = 0graph[character] = []print("\t\t\tinDegree: ", inDegree, sep = "")print("\t\t\tgraph: ", graph, sep = "")print("")sortedOrder = []return ''.join(sortedOrder)def main():inputWords = [["ba", "bc", "ac", "cab"], ["cab", "aaa", "aab"], ["ywx", "wz", "xww", "xz", "zyy", "zwz"]]for i in inputWords:print("Character order: " + find_order(i))print("-"*100+"\n")main()
The next step is building the graph by finding the ordering of characters from adjacent words.
We’ll iterate over the characters of the two words and compare them. If they’re different, there’s an edge from the character of the first word to the character of the second word.
For example, consider the words one
and two
. Comparing the first characters of both words o
and t
, since they’re different, we can assume o
comes before t
, and hence, there’s an edge from o
to t
: o -> t
. This concept is also explained in the Find dictionary problem explained in the Introduction lesson.
from collections import dequedef find_order(words):if len(words) == 0:return ""# a. Initialize the graphinDegree = {} # count of incoming edgesgraph = {} # adjacency list graphprint("Initializing graph and inDegree")for word in words:for character in word:inDegree[character] = 0graph[character] = []print("\tinDegree: ", inDegree, sep = "")print("\tgraph: ", graph, sep = "")print("")print("Building the graph")k = 1for i in range(0, len(words)-1):print("\tLoop iteration: ", k, sep = "")k+=1# find ordering of characters from adjacent wordsw1, w2 = words[i], words[i + 1]print("\t\tw1: ", w1, sep = "")print("\t\tw2: ", w2, sep = "")m = 1for j in range(0, min(len(w1), len(w2))):print("\t\tInner loop iteration: ", m, sep = "")m+=1sep = ""parent, child = w1[j], w2[j]print("\t\t\tparent: w1[",j,"] = ", parent, sep = "")print("\t\t\tchild: w2[",j,"] = ", child, sep = "")if parent != child: # if the two characters are different# put the child into it's parent's listprint("\t\t\tSince ", parent, " != ", child, ", from words \"", w1, "\" and \"", w2, "\", we can conclude that '", parent ,"' comes before '", child,"'", sep = "")print("\t\t\t\tAdding ", child, " to the adjacency list of ", parent, sep = "")graph[parent].append(child)print("\t\t\t\tgraph: ", graph, sep = "")inDegree[child] += 1 # increment child's inDegreeprint("\t\t\t\tIncrementing inDegree of ", child, ": ", inDegree[child]-1, " + 1 = ", inDegree[child], sep = "")print("\t\t\t\tinDegree: ", inDegree, sep = "")break # only the first different character between the two words will help us find the orderelse:print("\t\t\tSince parent = child: ", parent, " = ", child, " --> we move to the next character", sep = "")print("")print("")sortedOrder = []return ''.join(sortedOrder)def main():inputWords = [["ba", "bc", "ac", "cab"], ["cab", "aaa", "aab"], ["ywx", "wz", "xww", "xz", "zyy", "zwz"]]for i in inputWords:print("Character order: " + find_order(i))print("-"*100+"\n")main()
Since graph
and inDegree
are now set up, the next step is to find the sources, that is, vertices with inDegree = 0
.
from collections import dequedef find_order(words):if len(words) == 0:return ""# a. Initialize the graphinDegree = {} # count of incoming edgesgraph = {} # adjacency list graphprint("Initializing graph and inDegree")for word in words:for character in word:inDegree[character] = 0graph[character] = []print("\tinDegree: ", inDegree, sep = "")print("\tgraph: ", graph, sep = "")print("")print("Building the graph")for i in range(0, len(words)-1):# find ordering of characters from adjacent wordsw1, w2 = words[i], words[i + 1]for j in range(0, min(len(w1), len(w2))):parent, child = w1[j], w2[j]if parent != child: # if the two characters are different# put the child into it's parent's listgraph[parent].append(child)inDegree[child] += 1 # increment child's inDegreebreak # only the first different character between the two words will help us find the orderprint("\tinDegree: ", inDegree, sep = "")print("\tgraph: ", graph, sep = "")print("")print("Finding the sources")sources = deque()l = 1for key in inDegree:print("\tLoop iteration: ", l, sep = "")l+=1if inDegree[key] == 0:print("\t\tinDegree of vertex ", key, " = ", inDegree[key], " --> vertex is a source", sep = "")sources.append(key)print("\t\tsources: ", sources, sep = "")else:print("\t\tinDegree of vertex ", key, " = ", inDegree[key], " --> not 0, hence, not a source", sep = "")print("")print("")sortedOrder = []return ''.join(sortedOrder)def main():inputWords = [["ba", "bc", "ac", "cab"], ["cab", "aaa", "aab"], ["ywx", "wz", "xww", "xz", "zyy", "zwz"]]for i in inputWords:print("Character order: " + find_order(i))print("-"*100+"\n")main()
Lastly, we’ll topologically sort the vertices (that represent the characters in the alien language). We’ll use the same layer-by-layer approach explained in the Introduction guide: adding the sources into an array, sortedOrder
, and decrementing their children’s in-degrees. If a child’s in-degree becomes zero, this signifies that it belongs to the next layer and we add it to the sources
queue.
from collections import dequeimport copydef find_order(words):if len(words) == 0:return ""# a. Initialize the graphinDegree = {} # count of incoming edgesgraph = {} # adjacency list graphprint("Initializing graph and inDegree")for word in words:for character in word:inDegree[character] = 0graph[character] = []print("\tinDegree: ", inDegree, sep = "")print("\tgraph: ", graph, sep = "")print("")print("Building the graph")for i in range(0, len(words)-1):# find ordering of characters from adjacent wordsw1, w2 = words[i], words[i + 1]for j in range(0, min(len(w1), len(w2))):parent, child = w1[j], w2[j]if parent != child: # if the two characters are different# put the child into it's parent's listgraph[parent].append(child)inDegree[child] += 1 # increment child's inDegreebreak # only the first different character between the two words will help us find the orderprint("\tinDegree: ", inDegree, sep = "")print("\tgraph: ", graph, sep = "")print("")print("Finding the sources")sources = deque()for key in inDegree:if inDegree[key] == 0:sources.append(key)print("\tsources: ", sources, sep = "")print("")sortedOrder = []print("Getting the topological ordering")m = 1while sources:print("\tOuter loop iteration: ", m)m+=1tempsource = deque()tempsource = copy.copy(sources)vertex = sources.popleft()print("\t\tPopping from sources: ", tempsource, " --> ", sources, sep = "")print("\t\tvertex: ", vertex, sep = "")temporder = []temporder = copy.copy(sortedOrder)sortedOrder.append(vertex)print("\t\tAppending to sortedOrder: ", temporder, " --> ", sortedOrder, sep = "")n = 1for child in graph[vertex]: # get the node's children to decrement their in-degreesprint("\t\t\tInner loop iteration: ", n)n+=1print("\t\t\t\tchild: ", child, sep="")print("\t\t\t\tDecrementing the indegree of vertex ", child, ": ", inDegree[child], " --> ", inDegree[child] - 1, sep = "")inDegree[child] -= 1if inDegree[child] == 0:print("\t\t\t\tindegree of ", child, " = 0", sep = "")temps = []temps = copy.copy(sources)sources.append(child)print("\t\t\t\tAppending to sources: ", temps, " --> ", sources, sep = "")print("")print("")# topological sort is not possible as the graph has a cycleif len(sortedOrder) != len(inDegree):print("Topological sort is not possible as the graph has a cycle.")return ""return ''.join(sortedOrder)def main():inputWords = [["ba", "bc", "ac", "cab"], ["cab", "aaa", "aab"], ["ywx", "wz", "xww", "xz", "zyy", "zwz"]]for i in inputWords:print("Character order: " + find_order(i))print("-"*100+"\n")main()