# Python 2.7.7 Code
# Jonathan Frech 15th of January, 2016
# import needeed modules
import urllib, os, re, threading
DATASORTTYPE = ["highest_count", "alphabetically"][1]
# links the program checks
links = [
"https://jonathanfrech.wordpress.com/2015/03/28/1/",
"https://jonathanfrech.wordpress.com/2015/03/29/%cf%80-generator/",
"https://jonathanfrech.wordpress.com/2015/03/29/rectangles/",
"https://jonathanfrech.wordpress.com/2015/03/30/circle-walk/",
"https://jonathanfrech.wordpress.com/2015/03/30/moving/",
"https://jonathanfrech.wordpress.com/2015/03/31/polygons/",
"https://jonathanfrech.wordpress.com/2015/04/01/clean-up-your-mac/",
"https://jonathanfrech.wordpress.com/2015/04/02/circle-walk-ii/",
"https://jonathanfrech.wordpress.com/2015/04/03/colors-v/",
"https://jonathanfrech.wordpress.com/2015/04/04/colors-v-other-results/",
"https://jonathanfrech.wordpress.com/2015/04/05/happy-eastern/",
"https://jonathanfrech.wordpress.com/2015/04/06/colors-v-other-results-2/",
"https://jonathanfrech.wordpress.com/2015/04/07/colors-v-reupload/",
"https://jonathanfrech.wordpress.com/2015/04/08/bobbles/",
"https://jonathanfrech.wordpress.com/2015/04/09/%cf%86-generator/",
"https://jonathanfrech.wordpress.com/2015/04/10/hangman/",
"https://jonathanfrech.wordpress.com/2015/04/11/rand-pix/",
"https://jonathanfrech.wordpress.com/2015/04/12/fs-letters/",
"https://jonathanfrech.wordpress.com/2015/04/18/plant-mimic/",
"https://jonathanfrech.wordpress.com/2015/04/19/circle-crawler/",
"https://jonathanfrech.wordpress.com/2015/04/25/bouncing/",
"https://jonathanfrech.wordpress.com/2015/04/26/worldwide-pinhole-day/",
"https://jonathanfrech.wordpress.com/2015/05/01/star/",
"https://jonathanfrech.wordpress.com/2015/05/02/rain/",
"https://jonathanfrech.wordpress.com/2015/05/03/circle-splatter/",
"https://jonathanfrech.wordpress.com/2015/05/09/the-firefox-unicorn/",
"https://jonathanfrech.wordpress.com/2015/05/10/shaper/",
"https://jonathanfrech.wordpress.com/2015/05/16/shadow-v-1-1/",
"https://jonathanfrech.wordpress.com/2015/05/17/graph-sim/",
"https://jonathanfrech.wordpress.com/2015/05/23/colors-vi/",
"https://jonathanfrech.wordpress.com/2015/05/24/jclock-ii/",
"https://jonathanfrech.wordpress.com/2015/05/25/pentecost/",
"https://jonathanfrech.wordpress.com/2015/05/30/prime-spiral/",
"https://jonathanfrech.wordpress.com/2015/05/31/boxes/",
"https://jonathanfrech.wordpress.com/2015/06/06/grow/",
"https://jonathanfrech.wordpress.com/2015/06/07/jetris-v-1-2/",
"https://jonathanfrech.wordpress.com/2015/06/13/jong/",
"https://jonathanfrech.wordpress.com/2015/06/14/dig/",
"https://jonathanfrech.wordpress.com/2015/06/20/jake/",
"https://jonathanfrech.wordpress.com/2015/06/21/fs-letters-ii/",
"https://jonathanfrech.wordpress.com/2015/06/27/bubbletree/",
"https://jonathanfrech.wordpress.com/2015/06/28/primes/",
"https://jonathanfrech.wordpress.com/2015/07/04/circle-mover/",
"https://jonathanfrech.wordpress.com/2015/07/05/gradient-drawer/",
"https://jonathanfrech.wordpress.com/2015/07/11/jclock-iii/",
"https://jonathanfrech.wordpress.com/2015/07/12/jic-jac-joe/",
"https://jonathanfrech.wordpress.com/2015/07/18/sierpinski-triangle/",
"https://jonathanfrech.wordpress.com/2015/07/19/prime-spiral-ii/",
"https://jonathanfrech.wordpress.com/2015/07/25/menger-sponge/",
"https://jonathanfrech.wordpress.com/2015/07/26/bobble-throw/",
"https://jonathanfrech.wordpress.com/2015/08/01/jeakout/",
"https://jonathanfrech.wordpress.com/2015/08/02/pattern/",
"https://jonathanfrech.wordpress.com/2015/08/08/jclock-iv/",
"https://jonathanfrech.wordpress.com/2015/08/09/caesar-cipher/",
"https://jonathanfrech.wordpress.com/2015/08/15/jonnect-jour/",
"https://jonathanfrech.wordpress.com/2015/08/16/jasteroids/",
"https://jonathanfrech.wordpress.com/2015/08/22/random-resource-locator/",
"https://jonathanfrech.wordpress.com/2015/08/23/stacking-stones/",
"https://jonathanfrech.wordpress.com/2015/08/29/trippy/",
"https://jonathanfrech.wordpress.com/2015/08/30/r-lines/",
"https://jonathanfrech.wordpress.com/2015/09/05/j-filters/",
"https://jonathanfrech.wordpress.com/2015/09/06/space-adventures/",
"https://jonathanfrech.wordpress.com/2015/09/12/primes-ii/",
"https://jonathanfrech.wordpress.com/2015/09/13/j-filters-ii/",
"https://jonathanfrech.wordpress.com/2015/09/19/text-scrambler/",
"https://jonathanfrech.wordpress.com/2015/09/20/jclock-v/",
"https://jonathanfrech.wordpress.com/2015/09/26/pastel/",
"https://jonathanfrech.wordpress.com/2015/09/27/pygame-gfxdraw-aacircle-bug/",
"https://jonathanfrech.wordpress.com/2015/10/03/j-filters-iii-2/",
"https://jonathanfrech.wordpress.com/2015/10/04/sand/",
"https://jonathanfrech.wordpress.com/2015/10/10/jdrawer/",
"https://jonathanfrech.wordpress.com/2015/10/11/gradient-triangles/",
"https://jonathanfrech.wordpress.com/2015/10/17/flare/",
"https://jonathanfrech.wordpress.com/2015/10/18/ct-racetrack/",
"https://jonathanfrech.wordpress.com/2015/10/24/e-generator/",
"https://jonathanfrech.wordpress.com/2015/10/25/spiral/",
"https://jonathanfrech.wordpress.com/2015/10/31/halloween/",
"https://jonathanfrech.wordpress.com/2015/11/01/langtons-ant/",
"https://jonathanfrech.wordpress.com/2015/11/07/prime-remainders/",
"https://jonathanfrech.wordpress.com/2015/11/08/jclock-vi/",
"https://jonathanfrech.wordpress.com/2015/11/14/jclock-vii/",
"https://jonathanfrech.wordpress.com/2015/11/15/sailing/",
"https://jonathanfrech.wordpress.com/2015/11/21/text-spinner/",
"https://jonathanfrech.wordpress.com/2015/11/22/bubbles/",
"https://jonathanfrech.wordpress.com/2015/11/28/sleeper/",
"https://jonathanfrech.wordpress.com/2015/11/29/first-sunday-in-advent/",
"https://jonathanfrech.wordpress.com/2015/12/05/ct-racetrack-ii/",
"https://jonathanfrech.wordpress.com/2015/12/06/second-sunday-in-advent/",
"https://jonathanfrech.wordpress.com/2015/12/12/mandelbrot-set/",
"https://jonathanfrech.wordpress.com/2015/12/13/third-sunday-in-advent/",
"https://jonathanfrech.wordpress.com/2015/12/19/%cf%80-approximation/",
"https://jonathanfrech.wordpress.com/2015/12/20/fourth-sunday-in-advent/",
"https://jonathanfrech.wordpress.com/2015/12/24/merry-christmas/",
"https://jonathanfrech.wordpress.com/2015/12/26/spinning-shapes/",
"https://jonathanfrech.wordpress.com/2015/12/27/prime-circle/",
"https://jonathanfrech.wordpress.com/2015/12/31/happy-new-year/",
"https://jonathanfrech.wordpress.com/2016/01/02/white-fireworks/",
"https://jonathanfrech.wordpress.com/2016/01/03/rotating-squares/",
"https://jonathanfrech.wordpress.com/2016/01/09/j-filters-iv/"
]
# load the link and get html string
def load(link):
page = urllib.urlopen(link).read()
return page
# strip off Wordpress-code, leaving my post in raw text
def strip(page):
page = re.sub("\n", "", page)
page = re.sub('.*
\t\t
', '', page)
page = re.sub('
.*", "", page)
# not every post has a
-line...
page = re.sub("\t*\(function\(g\).*", "", page)
return page
# strips off the html
...
tag
def stripcode2(page):
page = re.sub("
.*
", "", page)
return page
# strips off every html <...> string bit
def striphtml(page):
page = re.sub("<.*?>", "", page)
return page
# strips off &...;-characters
def stripspecial(page):
page = re.sub("\&\#\d*\;", "", page)
return page
# strips off characters, which are not ascii and the tab
def stripnonascii(page):
page = re.sub("[^\x00-\x7f]", "", page)
page = re.sub("\x0D", "", page)
page = re.sub("\t", "", page)
return page
# strips off \n characters
def striplinebreaks(page):
page = re.sub("\n", "", page)
return page
# strips off everything from a page (using above functions)
def stripall(page):
return striplinebreaks( stripnonascii( stripspecial( striphtml( stripcode2( stripcode( strip( page ) ) ) ) ) ) )
# makes sure given string has a specific length (adds spaces left or right)
def lengthen(s, length, spaceadd = "left"):
s = str(s)
if spaceadd == "left":
while len(s) < length:
s = " " + s
elif spaceadd == "right":
while len(s) < length:
s = s + " "
if len(s) > length:
s = s[0:length]
return s
# makes a float fitting for the screen
def fit(n):
split = re.split("\.", str(n))
integer, decimal = split[0], split[1]
return lengthen(integer, 3) + "." + lengthen(decimal, 3, "right")
# generates a table entry with given character and count
def gettableentry(char, count):
if char == "all":
pass
elif char == "\n":
char = "\\n "
else:
char = "'" + char + "'"
data.append([char, count])
return "\n| " + char + " | " + lengthen(str(count), 5) + " | " + fit(count / totalcharacters * 100) + " |"
# check every link entry
n = .0
string = ""
for link in links:
page = stripall(load(link))
string += page
# status report
n += 1
print "Loaded " + str(int(n / len(links) * 100)) + "% of the posts..."
# sort the string with
string = list(string)
string.sort()
# get the number of every character in the string
characters = {" ":0}
for char in string:
if char not in characters:
characters[char] = 1
else:
characters[char] += 1
# get the total number of characters
totalcharacters = float(len(string))
# initialize the data list (character, count)
data = []
# start the table
table = "+------+-------+---------+"
table += "\n| char | count | percent |"
table += "\n+------+-------+---------+"
table += gettableentry("all", int(totalcharacters))
# sort characters and create table string
if DATASORTTYPE == "highest_count":
characters2 = characters.copy()
for char in characters:
c = None
for _char in characters2:
if not c or characters[_char] > characters[c]:
c = _char
if c:
#print c, characters[c]
table += gettableentry(c, characters[c])
characters2.pop(c)
# keep the sorted string and create table string
elif DATASORTTYPE == "alphabetically":
for char in characters:
table += gettableentry(char, characters[char])
# finish table
table += "\n+------+-------+---------+"
# print data sort type
print "DATASORTTYPE = " + DATASORTTYPE
# print data
print "DATA = " + str(data)
# print calculated table
print table