element that holds the result will hold a number, and we will want to increment a counter by 1. Otherwise, it will hold a \\xa0 character, and we will not increment the counter."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def is_success(s):\n",
" if s == '\\xa0':\n",
" return 0\n",
" else:\n",
" return 1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now, we can loop through each table in the `score_tbodys` list, counting the number of tops and zones for each boulder in each round. This step involved a lot of reverse-engineering HTML, and for brevity I will not explain it in detail.\n",
"\n",
"We initialize a list, and append lists to it after every round that correspond to each boulder's results."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1243, 'FJR', 'D3', 'Divisional', 'final', 1, 13, 3, 12, 0]\n",
"[1243, 'FJR', 'D3', 'Divisional', 'final', 2, 13, 4, 10, 0]\n",
"[1243, 'FJR', 'D3', 'Divisional', 'final', 3, 13, 1, 12, 0]\n",
"[1243, 'FJR', 'D3', 'Divisional', 'final', 4, 13, 1, 5, 0]\n",
"[1243, 'FYA', 'D3', 'Divisional', 'final', 1, 13, 3, 13, 0]\n"
]
}
],
"source": [
"data_list = []\n",
"\n",
"for n in range(1, 17): \n",
" # There are 16 table bodies that contain results - qualifers and finals for all 8 categories.\n",
" tb = score_tbodys[n]\n",
"\n",
" # 4 Boulders per round. Each list of three elements holds tops, zones, and lowzones for a boulder.\n",
" boulders = [[0, 0, 0],\n",
" [0, 0, 0],\n",
" [0, 0, 0],\n",
" [0, 0, 0],\n",
" ]\n",
" climbers = 0\n",
"\n",
" # this is where category is stored in the tree\n",
" category = tb.find_all('tr')[0].find_all('td')[1].find_all('div')[1].string[6:] \n",
"\n",
" for tr in tb.find_all('tr'): \n",
" # each table row corresponds to one climber.\n",
" if 'class' in tr.attrs.keys():\n",
" # a no-show climber is skipped.\n",
" pass\n",
" else:\n",
" climbers += 1\n",
" score_data = tr.find_all('td')[7:]\n",
" for i in range(4): \n",
" # loops once for each boulder\n",
" divs = score_data[i].find_all('div')\n",
" boulders[i][0] += is_success(divs[0].string) # increment tops if the climber topped\n",
" boulders[i][1] += is_success(divs[1].string) # increment zone\n",
" if len(divs) > 4: \n",
" # only if the round uses low zones\n",
" boulders[i][2] += is_success(divs[2].string) # increment lowzone\n",
"\n",
" round = 'final'\n",
"\n",
" # the last 8 table bodies correspond to qualifier rounds\n",
" if n > 8:\n",
" round = 'quali'\n",
"\n",
" # we know some things about this comp already - specifically the eid, location, and level\n",
" # but in the original project I wrote code to parse that information as well.\n",
" # we append a list that corresponds to one row of our eventual dataframe:\n",
" for i in range(4):\n",
" data_list.append([1243, category, 'D3', 'Divisional', round, i + 1, climbers] + boulders[i])\n",
"\n",
"for row in data_list[0:5]:\n",
" print(row)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Finally, using pandas, we can convert this nested list into a dataframe. "
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" eid | \n",
" Category | \n",
" Location | \n",
" Level | \n",
" Round | \n",
" # | \n",
" Climbers | \n",
" Tops | \n",
" Zones | \n",
" Lows | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1243 | \n",
" FJR | \n",
" D3 | \n",
" Divisional | \n",
" final | \n",
" 1 | \n",
" 13 | \n",
" 3 | \n",
" 12 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1243 | \n",
" FJR | \n",
" D3 | \n",
" Divisional | \n",
" final | \n",
" 2 | \n",
" 13 | \n",
" 4 | \n",
" 10 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" 1243 | \n",
" FJR | \n",
" D3 | \n",
" Divisional | \n",
" final | \n",
" 3 | \n",
" 13 | \n",
" 1 | \n",
" 12 | \n",
" 0 | \n",
"
\n",
" \n",
" 3 | \n",
" 1243 | \n",
" FJR | \n",
" D3 | \n",
" Divisional | \n",
" final | \n",
" 4 | \n",
" 13 | \n",
" 1 | \n",
" 5 | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
" 1243 | \n",
" FYA | \n",
" D3 | \n",
" Divisional | \n",
" final | \n",
" 1 | \n",
" 13 | \n",
" 3 | \n",
" 13 | \n",
" 0 | \n",
"
\n",
" \n",
" 5 | \n",
" 1243 | \n",
" FYA | \n",
" D3 | \n",
" Divisional | \n",
" final | \n",
" 2 | \n",
" 13 | \n",
" 10 | \n",
" 10 | \n",
" 0 | \n",
"
\n",
" \n",
" 6 | \n",
" 1243 | \n",
" FYA | \n",
" D3 | \n",
" Divisional | \n",
" final | \n",
" 3 | \n",
" 13 | \n",
" 2 | \n",
" 11 | \n",
" 0 | \n",
"
\n",
" \n",
" 7 | \n",
" 1243 | \n",
" FYA | \n",
" D3 | \n",
" Divisional | \n",
" final | \n",
" 4 | \n",
" 13 | \n",
" 1 | \n",
" 11 | \n",
" 0 | \n",
"
\n",
" \n",
" 8 | \n",
" 1243 | \n",
" FYB | \n",
" D3 | \n",
" Divisional | \n",
" final | \n",
" 1 | \n",
" 13 | \n",
" 6 | \n",
" 9 | \n",
" 0 | \n",
"
\n",
" \n",
" 9 | \n",
" 1243 | \n",
" FYB | \n",
" D3 | \n",
" Divisional | \n",
" final | \n",
" 2 | \n",
" 13 | \n",
" 1 | \n",
" 13 | \n",
" 0 | \n",
"
\n",
" \n",
" 10 | \n",
" 1243 | \n",
" FYB | \n",
" D3 | \n",
" Divisional | \n",
" final | \n",
" 3 | \n",
" 13 | \n",
" 4 | \n",
" 8 | \n",
" 0 | \n",
"
\n",
" \n",
" 11 | \n",
" 1243 | \n",
" FYB | \n",
" D3 | \n",
" Divisional | \n",
" final | \n",
" 4 | \n",
" 13 | \n",
" 1 | \n",
" 5 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" eid Category Location Level Round # Climbers Tops Zones Lows\n",
"0 1243 FJR D3 Divisional final 1 13 3 12 0\n",
"1 1243 FJR D3 Divisional final 2 13 4 10 0\n",
"2 1243 FJR D3 Divisional final 3 13 1 12 0\n",
"3 1243 FJR D3 Divisional final 4 13 1 5 0\n",
"4 1243 FYA D3 Divisional final 1 13 3 13 0\n",
"5 1243 FYA D3 Divisional final 2 13 10 10 0\n",
"6 1243 FYA D3 Divisional final 3 13 2 11 0\n",
"7 1243 FYA D3 Divisional final 4 13 1 11 0\n",
"8 1243 FYB D3 Divisional final 1 13 6 9 0\n",
"9 1243 FYB D3 Divisional final 2 13 1 13 0\n",
"10 1243 FYB D3 Divisional final 3 13 4 8 0\n",
"11 1243 FYB D3 Divisional final 4 13 1 5 0"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from pandas import DataFrame\n",
"\n",
"column_titles = ['eid', 'Category', 'Location', 'Level', 'Round', '#', 'Climbers', 'Tops', 'Zones', 'Lows']\n",
"New_df = DataFrame(data_list, columns=column_titles)\n",
"New_df.head(12)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "MATH9",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}