%%sql
postgresql:///schools

-- Select all rows from the database
-- Display only ten results
SELECT *
FROM schools
LIMIT 10;

10 rows affected.


%%nose
last_output = _

def test_task1_output_type():
    assert str(type(last_output)) == "<class 'sql.run.ResultSet'>", \
    "Please ensure an SQL ResultSet is the output of the code cell." 

results = last_output.DataFrame()

def test_task1_results():
    assert results.shape == (10, 7), \
    "The results should have fourteen columns and ten rows."
    assert set(results.columns) == set(['school_name', 'borough', 'building_code', 'average_math', 'average_reading', 'average_writing', 'percent_tested']), \
    'The results should include all columns from the database, without using an alias.'
    assert last_output.DataFrame().loc[0, 'building_code'] == "M022", \
    "The building code for the first school should be M022."

2/2 tests passed


%%sql

-- Count rows with percent_tested missing and total number of schools
SELECT COUNT(*) - COUNT(percent_tested) as num_tested_missing, COUNT(*) as num_schools
FROM schools;

 * postgresql:///schools
1 rows affected.


%%nose
last_output = _
last_output_df = last_output.DataFrame()

def test_task2_columns():
    assert last_output_df.shape == (1, 2), \
    "Did you correctly select the data? Expected the result to contain one row and two columns?"
    assert set(last_output_df.columns) == set(["num_tested_missing", "num_schools"]), \
    "Did you use the alias `num_tested_missing` and also select the `num_schools` column?"

def test_task2_output():
    assert last_output_df.iloc[0, 0] == 20, \
    """Did you correctly calculate `"num_tested_missing"?"""
    assert last_output_df.iloc[0, 1] == 375, \
    """Did you correctly calculate the total number of rows in the database?"""

2/2 tests passed


%%sql

-- Count the number of unique building_code values
SELECT COUNT(DISTINCT building_code) as num_school_buildings
FROM schools;

 * postgresql:///schools
1 rows affected.


%%nose
last_output = _
last_output_df = last_output.DataFrame()

def test_task3_column_name():
    assert last_output_df.columns.tolist() == ["num_school_buildings"], \
    "Did you use the correct alias for the number of unique school buildings?"

def test_task3_value():
    assert last_output_df.values.tolist() == [[233]], \
    "Did you use the correct method to calculate how many unique school buildings are in the database? Expected a different value."

0/2 tests passed; 2 failed
========
__main__.test_task3_column_name
========
Traceback (most recent call last):
  File "/usr/lib/python3.6/unittest/case.py", line 59, in testPartExecutor
    yield
  File "/usr/lib/python3.6/unittest/case.py", line 605, in run
    testMethod()
  File "/usr/local/lib/python3.6/dist-packages/nose/case.py", line 198, in runTest
    self.test(*self.arg)
  File "<string>", line 6, in test_task3_column_name
AssertionError: Did you use the correct alias for the number of unique school buildings?

========
__main__.test_task3_value
========
Traceback (most recent call last):
  File "/usr/lib/python3.6/unittest/case.py", line 59, in testPartExecutor
    yield
  File "/usr/lib/python3.6/unittest/case.py", line 605, in run
    testMethod()
  File "/usr/local/lib/python3.6/dist-packages/nose/case.py", line 198, in runTest
    self.test(*self.arg)
  File "<string>", line 10, in test_task3_value
AssertionError: Did you use the correct method to calculate how many unique school buildings are in the database? Expected a different value.


%%sql

-- Select school and average_math
-- Filter for average_math 640 or higher
-- Display from largest to smallest average_math
SELECT school_name, average_math
FROM schools
WHERE average_math >= 640
ORDER BY average_math DESC;

 * postgresql:///schools
10 rows affected.


%%nose
last_output = _
last_output_df = last_output.DataFrame()

def test_task4_columns():
    assert set(last_output_df.columns) == set(["school_name", "average_math"]), \
    "Did you select the correct columns?"

def test_task4_filter():
    assert last_output_df["average_math"].min() >= 640, \
    """Did you correctly filter for "average_math" scores more than or equal to 640?"""
    assert last_output_df.shape == (10, 2), \
    """The output has the wrong number of results, did you correctly filter the "average_math" column?"""

def test_task4_values():
    assert last_output_df.iloc[0,0] == "Stuyvesant High School", \
    """Did you run the correct query? Expected the first school to be "Stuyvesant High School"."""
    assert last_output_df.iloc[0,1] == 754.0, \
    """Did you correctly sort the values by "average_math" in descending order? Expected a different range of results."""

3/3 tests passed


%%sql

-- Find lowest average_reading
SELECT MIN(average_reading) as lowest_reading
FROM schools;

 * postgresql:///schools
1 rows affected.


%%nose
last_output = _
last_output_df = last_output.DataFrame()

def test_task5_value():  
    assert last_output_df["lowest_reading"].values.tolist() == [302.0], \
    """Did you select the minimum value for the "average_reading" column?"""

def test_task5_alias():
    assert last_output_df.columns.tolist() == ["lowest_reading"], \
    """Did you use the correct alias? Expected "lowest_reading"."""

2/2 tests passed


%%sql

-- Find the top score for average_writing
-- Group the results by school
-- Sort by max_writing in descending order
-- Reduce output to one school
SELECT school_name, MAX(average_writing) as max_writing
FROM schools
GROUP BY school_name
ORDER BY max_writing DESC
LIMIT 1;

 * postgresql:///schools
1 rows affected.


%%nose
last_output = _
last_output_df = last_output.DataFrame()

def test_task6_columns():
    assert set(last_output_df.columns) == set(["school_name", "max_writing"]), \
    """Did you select "average_writing" and use an alias?"""
    
def test_task6_shape():
    assert last_output_df.shape[0] == 1, \
    "Did you select the correct number of values? Expected one row."

def test_task6_values():
    assert last_output_df.values.tolist() == [['Stuyvesant High School', 693.0]], \
    """Did you select the maximum value for "average_writing"? Expected a different value."""

3/3 tests passed


%%sql

-- Calculate average_sat
-- Group by school_name
-- Sort by average_sat in descending order
-- Display the top ten results
SELECT school_name, SUM(average_math + average_reading + average_writing) as average_sat
FROM schools
GROUP BY school_name
ORDER BY average_sat DESC
LIMIT 10;

 * postgresql:///schools
10 rows affected.


%%nose
last_output = _
last_output_df = last_output.DataFrame()

def test_task7_columns():
    assert set(last_output_df.columns) == set(["school_name", "average_sat"]), \
    """Did you select the correct columns and use an alias for the sum of the three sat score columns?"""
    
def test_task7_shape():
    assert last_output_df.shape[0] == 10, \
    "Did you limit the number of results to ten?"
    assert last_output_df.shape[1] == 2, \
    """Expected your query to return two columns: "school_name" and "average_sat"."""

def test_task7_values():
    assert last_output_df.iloc[0].values.tolist() == ['Stuyvesant High School', 2144], \
    """Did you correctly define your query? Expected different values for the first school."""
    assert last_output_df["average_sat"].min() == 1889, \
    """Did you correctly filter the results? Expected a different lowest score for "average_sat"."""  
    assert last_output_df["average_sat"].max() == 2144, \
    """Did you correctly calculate the "average_sat" column? Expected a different top score."""

3/3 tests passed


%%sql

-- Select borough and a count of all schools, aliased as num_schools
-- Calculate the sum of average_math, average_reading, and average_writing, divided by a count of all schools, aliased as average_borough_sat
-- Organize results by borough
-- Display by average_borough_sat in descending order
SELECT borough, COUNT(*) as num_schools, SUM(average_math + average_reading + average_writing) / COUNT(*) as average_borough_sat
FROM schools
GROUP BY borough
ORDER BY average_borough_sat DESC;

 * postgresql:///schools
5 rows affected.


%%nose
last_output = _
last_output_df = last_output.DataFrame()

def test_task8_columns():
    assert set(last_output_df.columns) == set(['borough', 'num_schools', 'average_borough_sat']), \
    """Did you select the correct columns and use aliases for the number of schools and average sat scores?"""

def test_task8_shape():
    assert last_output_df.shape[0] == 5, \
    "Did you group by the correct column? Expected five rows to be returned: one for each borough."
    assert last_output_df.shape[1] == 3, \
    """Expected your query to return three columns: "borough", "num_schools", and "average_borough_sat"."""

def test_task8_values():
    # Each assert statement checks values per row 
    assert last_output_df.iloc[0].values.tolist() == ['Staten Island', 10, 1439], \
    """Did you correctly define your query? Expected different values for Staten Island."""
    assert last_output_df.iloc[1].values.tolist() == ['Queens', 69, 1345], \
    """Did you correctly define your query? Expected different values for Queens."""
    assert last_output_df.iloc[2].values.tolist() == ['Manhattan', 89, 1340], \
    """Did you correctly define your query? Expected different values for Manhattan."""
    assert last_output_df.iloc[3].values.tolist() == ['Brooklyn', 109, 1230], \
    """Did you correctly define your query? Expected different values for Brooklyn."""
    assert last_output_df.iloc[4].values.tolist() == ['Bronx', 98, 1202], \
    """Did you correctly define your query? Expected different values for the Bronx."""
    # Check lowest average_reading score is in the last row
    assert last_output_df.iloc[-1, 0] == 'Bronx', \
    """Did you sort the results by "average_sat" in descending order?"""

3/3 tests passed


%%sql

-- Select school and average_math
-- Filter for schools in Brooklyn
-- Aggregate on school_name
-- Display results from highest average_math and restrict output to five rows
SELECT school_name, average_math
FROM schools
WHERE borough = 'Brooklyn'
GROUP BY school_name
ORDER BY average_math DESC
LIMIT 5;

 * postgresql:///schools
5 rows affected.


%%nose
last_output = _
last_output_df = last_output.DataFrame()

def test_task9_columns():
    assert last_output_df.columns.tolist() == ['school_name', 'average_math'], \
    """Did you select the correct columns? Expected "school_name" and "average_math"."""
    
def test_task9_shape():
    assert last_output_df.shape[0] == 5, \
    "Did you limit the output to 5 rows?"
    assert last_output_df.shape[1] == 2, \
    "Did you select the correct number of columns? Expected two."
    
def test_task9_school_names():
    assert last_output_df["school_name"].tolist() == ['Brooklyn Technical High School', 'Brooklyn Latin School', 'Leon M. Goldstein High School for the Sciences', 'Millennium Brooklyn High School', 'Midwood High School'], \
    "Did you correctly filter by borough? Expected a different list of school names."
    
def test_task9_values():
    assert last_output_df["average_math"].max() == 682, \
    """Did you select the correct values? Expected a maximum value of 682.0 for "average_math"."""    
    assert last_output_df["average_math"].min() == 550, \
    """Did you select the correct values? Expected a minimum value of 550.0 for "average_math"."""
    assert last_output_df["average_math"].values.tolist() == [682, 625, 563, 553, 550], \
    """Did you sort by "average_math" in descending order? Expected different values."""

4/4 tests passed

column	type	description
`school_name`	`varchar`	Name of school
`borough`	`varchar`	Borough that the school is located in
`building_code`	`varchar`	Code for the building
`average_math`	`int`	Average math score for SATs
`average_reading`	`int`	Average reading score for SATs
`average_writing`	`int`	Average writing score for SATs
`percent_tested`	`numeric`	Percentage of students completing SATs

school_name	borough	building_code	average_math	average_reading	average_writing	percent_tested
New Explorations into Science, Technology and Math High School	Manhattan	M022	657	601	601	None
Essex Street Academy	Manhattan	M445	395	411	387	78.9
Lower Manhattan Arts Academy	Manhattan	M445	418	428	415	65.1
High School for Dual Language and Asian Studies	Manhattan	M445	613	453	463	95.9
Henry Street School for International Studies	Manhattan	M056	410	406	381	59.7
Bard High School Early College	Manhattan	M097	634	641	639	70.8
Urban Assembly Academy of Government and Law	Manhattan	M445	389	395	381	80.8
Marta Valle High School	Manhattan	M025	438	413	394	35.6
University Neighborhood High School	Manhattan	M446	437	355	352	69.9
New Design High School	Manhattan	M445	381	396	372	73.7

school_name	average_math
Stuyvesant High School	754
Bronx High School of Science	714
Staten Island Technical High School	711
Queens High School for the Sciences at York College	701
High School for Mathematics, Science, and Engineering at City College	683
Brooklyn Technical High School	682
Townsend Harris High School	680
High School of American Studies at Lehman College	669
New Explorations into Science, Technology and Math High School	657
Eleanor Roosevelt High School	641

school_name	average_sat
Stuyvesant High School	2144
Staten Island Technical High School	2041
Bronx High School of Science	2041
High School of American Studies at Lehman College	2013
Townsend Harris High School	1981
Queens High School for the Sciences at York College	1947
Bard High School Early College	1914
Brooklyn Technical High School	1896
Eleanor Roosevelt High School	1889
High School for Mathematics, Science, and Engineering at City College	1889

1. Inspecting the data¶

`schools`

2. Finding missing values¶

3. Schools by building code¶

4. Best schools for math¶

5. Lowest reading score¶

6. Best writing school¶

7. Top 10 schools¶

8. Ranking boroughs¶

9. Brooklyn numbers¶

borough	num_schools	average_borough_sat
Staten Island	10	1439
Queens	69	1345
Manhattan	89	1340
Brooklyn	109	1230
Bronx	98	1202

school_name	average_math
Brooklyn Technical High School	682
Brooklyn Latin School	625
Leon M. Goldstein High School for the Sciences	563
Millennium Brooklyn High School	553
Midwood High School	550