from collections import Counter
import randomdef find_ngrams(seq, n):Given a list seq returns a list of ngrams where each element is
an n-tuple.
Example:
>>> seq = [1, 2, 1, 3, 1, 2, 7]
>>> find_ngrams(seq, 2)
[(1, 2), (2, 1), (1, 3), (3, 1), (1, 2), (2, 7)]
>>> find_ngrams(seq, 3)
[(1, 2, 1), (2, 1, 3), (1, 3, 1), (3, 1, 2), (1, 2, 7)]
ngrams = []Iterate over seq in n-sized slices each converted to
an n-tuple and appended to ngrams.
for i in range(len(seq) - n + 1):
new_tuple = tuple(seq[i:i+n])
ngrams.append(new_tuple)
return ngramsGiven a list seq returns a Markov model of the given order
as a dictionary. The keys are order-sized tuples. The value for each
key is a list of elements in seq that immediately follow
occurances of the key in the seq.
Example:
>>> seq = [1, 2, 1, 3, 1, 2, 7]
>>> markov_model(seq, 1)
{(2,): [1, 7], (3,): [1], (1,): [2, 3, 2]}
>>> markov_model(seq, 2)
{(1, 2): [1, 7], (1, 3): [1], (3, 1): [2], (2, 1): [3]}
def markov_model(seq, order):First use find_ngrams to slice seq appropriately.
ngrams = find_ngrams(seq, order+1)
markov = {}For each ngram in ngrams (where n = order + 1) the first
(n - 1) elements are used as the key and the last
element is appended to the list of possible
continuations.
for ngram in ngrams:
try:
markov[ngram[:-1]].append(ngram[-1])
except KeyError:
markov[ngram[:-1]] = [ngram[-1]]
return markovGiven a list seq returns a Markov chain of
the given order with a maximum length of the integer max_length.
In this case, the function assumes that seq is
a list of words, tries to begin the chain with a capitalized word and
end the chain with a terminal punctions ('.', '!', '?').
def markov_chain(seq, order, max_length=50): markov = markov_model(seq, order)Start chain with an ngram (n = order) that begins with
an uppercase letter, if possible.
If there are no ngrams that begin with an uppercase letter, start with
any random ngram.
startkeys = [key for key in markov if key[0][0].isupper()]
if len(startkeys) == 0:
startkeys = list(markov.keys())
key = random.choice(startkeys)Markov chain is a list beginning with the start key.
chain = list(key)while loop to generate markov chain.
while True:Choose a random continuation from the elements of
the value for the current key.
new = random.choice(markov[key])
chain.append(new)break if the new element is the end of a sentence
if new[-1] in ['.', '!', '?']:
breakbreak if the length of chain is max_length or greater.
if len(chain) >= max_length:
breakConcatenate new element to key and use the appropriate slice
of the resulting tuple as the new key.
key = (key + (new,))
key = key[1:]
return " ".join(chain)Assign a multiline string
sonnet = '''From fairest creatures we desire increase,
That thereby beauty's rose might never die,
But as the riper should by time decease,
His tender heir might bear his memory:
But thou contracted to thine own bright eyes,
Feed'st thy light's flame with self-substantial fuel,
Making a famine where abundance lies,
Thy self thy foe, to thy sweet self too cruel:
Thou that art now the world's fresh ornament,
And only herald to the gaudy spring,
Within thine own bud buriest thy content,
And tender churl mak'st waste in niggarding:
Pity the world, or else this glutton be,
To eat the world's due, by the grave and thee.'''Open the file 'sonnets.txt' for reading and read the contents.
(You will need to download sonnets.txt to the same directory as this code.)
sonnets = open('sonnets.txt', 'r').read()The split() method splits a string into a list of substrings separated
by white space.
Example:
>>> my_string = 'I am Sam'
>>> my_string.split()
['I', 'am', 'Sam']
Try substituting sonnets for sonnet.
text = sonnet.split()Check output of find_ngrams
ngrams = find_ngrams(text, 2)
print(ngrams)Check output of markov_model
markov = markov_model(text, 1)
print(markov)Print out the fist 10 keys of the markov model dictionary
for key in list(markov)[:10]:
print(key, markov[key])Print the n-order markov_chain generated from text. Try using
different values for n.
n = 1
print(markov_chain(text, n))