import csv
from collections import defaultdict

# a dictionary whose value defaults to a list.
data = defaultdict(list)

# open the csv file and iterate over its rows. the enumerate()
# function gives us an incrementing row number
for i, row in enumerate(csv.reader(open('python_test.csv', 'rb'))):
    # skip the header line and any empty rows
    # we take advantage of the first row being indexed at 0
    # i=0 which evaluates as false, as does an empty row
    if not i or not row:
        continue

    # unpack the columns into local variables
    _, zipcode, level = row
    # for each zipcode, add the level the list
    data[zipcode].append(float(level))

# loop over each zipcode and its list of levels and calculate the average
for zipcode, levels in data.iteritems():
    print zipcode, sum(levels) / float(len(levels))


[python_test.csv]

ID	ZIPCODE	RATE
1	19003	27.5
2	19003	31.33
3	19083	41.4
4	19083	17.9
5	19102	21.4


위의 Input data가 소스코드를 통해 다음과 같은 결과로 나온다(Python 2.5 이상에서 정상적으로 작동된다.)

19003 29.415
19083 29.65
19102 21.4



Advanced Source code

import csv
from collections import defaultdict

# a dictionary whose value defaults to a list.
data = defaultdict(list)
data2 = defaultdict(list)

f = open('python_test.txt', 'r')

fList = []

for line in f.readlines():
	fList.append(line.split('\t'))

f.close()

# open the csv file and iterate over its rows. the enumerate()
# function gives us an incrementing row number

for i, row in enumerate(fList):
    # skip the header line and any empty rows
    # we take advantage of the first row being indexed at 0
    # i=0 which evaluates as false, as does an empty row
    if not i or not row:
        continue

    # unpack the columns into local variables
    _, zipcode, level, info = row
    # for each zipcode, add the level the list
    data[zipcode].append(float(level))
    data2[zipcode].append(info.strip())

# loop over each zipcode and its list of levels and calculate the average
for zipcode, levels in data.iteritems():
    print zipcode, sum(levels) / float(len(levels)), '|'.join(data2[zipcode])


Reference

<defaultdict>
http://docs.python.org/release/2.5.2/lib/defaultdict-examples.html


<sourcecode>

http://stackoverflow.com/questions/5328971/python-csv-need-to-group-and-calculate-values-based-on-one-key


+ Recent posts