diff --git a/data/bitcoin_data.csv b/data/bitcoin_data.csv new file mode 100644 index 0000000..cb85906 --- /dev/null +++ b/data/bitcoin_data.csv @@ -0,0 +1,453 @@ +Timestamp,Open,High,Low,Close,Volume (BTC),Volume (Currency),Weighted Price +1/1/17 0:00,966.34,1005,960.53,997.75,6850.59,6764742.06,987.47 +1/2/17 0:00,997.75,1032,990.01,1012.54,8167.38,8273576.99,1013 +1/3/17 0:00,1011.44,1039,999.99,1035.24,9089.66,9276500.31,1020.56 +1/4/17 0:00,1035.51,1139.89,1028.56,1114.92,21562.46,23469644.96,1088.45 +1/5/17 0:00,1114.38,1136.72,885.41,1004.74,36018.86,36211399.53,1005.35 +1/6/17 0:00,1004.73,1026.99,871,893.89,27916.7,25523261.28,914.26 +1/7/17 0:00,894.02,907.05,812.28,906.2,20401.11,17624310.02,863.89 +1/8/17 0:00,906.2,941.81,881.3,909.75,8937.49,8168170.35,913.92 +1/9/17 0:00,909.8,912.87,875,896.23,8716.18,7780059.06,892.6 +1/10/17 0:00,896.09,912.47,889.41,905.05,8535.52,7704271.2,902.61 +1/11/17 0:00,905.24,918.4,755,778.62,35893.77,29459968.9,820.75 +1/12/17 0:00,778.7,832.99,751.34,807.47,17400.14,13632251.44,783.46 +1/13/17 0:00,807.51,831.4,775,825.86,11409.52,9224729.89,808.51 +1/14/17 0:00,825.98,837.76,810,818.27,6614.72,5470214.99,826.98 +1/15/17 0:00,819.52,823.45,808,821.86,4231.46,3455365.87,816.59 +1/16/17 0:00,821.86,835,818.09,831.81,6166.04,5107031.21,828.25 +1/17/17 0:00,831.76,908.5,827,905.99,12264.17,10775497.86,878.62 +1/18/17 0:00,905.95,915.99,851.74,887.7,11181.9,9830117.65,879.11 +1/19/17 0:00,887.69,910,878.89,901.01,11094.6,9929647.2,895 +1/20/17 0:00,902.23,902.43,880,895.8,6618.63,5915865.49,893.82 +1/21/17 0:00,895.81,928,895,921.98,5865.63,5373391.08,916.08 +1/22/17 0:00,921.98,937.74,886.76,923.76,7166.67,6569177.19,916.63 +1/23/17 0:00,923.75,927.57,913.21,913.52,3514.74,3234454.3,920.25 +1/24/17 0:00,913.15,923,881.48,885.47,9405.05,8495975.46,903.34 +1/25/17 0:00,885.47,904.01,884.5,893.43,5291.55,4725942.37,893.11 +1/26/17 0:00,893.25,919.49,892.86,914.83,5164.9,4688102.25,907.69 +1/27/17 0:00,914.95,924.88,908.01,918.02,4830.54,4433746.38,917.86 +1/28/17 0:00,918.02,920.99,913.08,918.6,1175.86,1078884.52,917.53 +1/29/17 0:00,918.6,919.99,910.45,912.01,1349.01,1234746.08,915.29 +1/30/17 0:00,913.12,923,910,919.99,3121.19,2865063.73,917.94 +1/31/17 0:00,919.99,970,917.5,963.99,9507.1,9024041.55,949.19 +2/1/17 0:00,963.99,986.96,962,983.67,6954.4,6748543.97,970.4 +2/2/17 0:00,983.79,1011.65,973.12,1009.17,8269.68,8222686.79,994.32 +2/3/17 0:00,1009.15,1022.64,987.01,1016.99,10928.02,11029881.3,1009.32 +2/4/17 0:00,1015.06,1044.23,1004,1033.18,5752.29,5884560.6,1022.99 +2/5/17 0:00,1033.72,1034.6,1004.7,1011.07,4348.17,4417546.51,1015.96 +2/6/17 0:00,1010.03,1031.94,1005.1,1023.99,5080.85,5188721.52,1021.23 +2/7/17 0:00,1024,1056.25,1021.3,1052.48,5372.13,5605838.11,1043.5 +2/8/17 0:00,1052.48,1070,1025,1050.4,9633.26,10072826.84,1045.63 +2/9/17 0:00,1051.73,1074.69,913.73,986,25150.07,25064862.06,996.61 +2/10/17 0:00,985.71,1010.68,950,996.08,10731.29,10491584.37,977.66 +2/11/17 0:00,995.49,1020,987.62,1012.4,4472.51,4509154.73,1008.19 +2/12/17 0:00,1011.01,1011.97,991.98,1000.73,2684,2690265.37,1002.34 +2/13/17 0:00,1000.73,1007.89,975.1,1000.79,4547.26,4522130.78,994.47 +2/14/17 0:00,1000.74,1019,987.97,1008.88,6761.11,6806110.34,1006.66 +2/15/17 0:00,1008.88,1013.8,1000.97,1011.53,3095.66,3123812.22,1009.09 +2/16/17 0:00,1011.53,1043.21,1010.75,1032.7,5996.1,6170238.86,1029.04 +2/17/17 0:00,1032.91,1064.99,1031.98,1055.46,6085.02,6386285.16,1049.51 +2/18/17 0:00,1055.75,1068.99,1046.14,1056.4,4474.95,4745831.61,1060.53 +2/19/17 0:00,1056.4,1061.9,1039.07,1051.8,2270.85,2388775.41,1051.93 +2/20/17 0:00,1051.8,1089.99,1044.39,1084,3664.37,3902660.35,1065.03 +2/21/17 0:00,1084,1126.86,1077,1124.62,8395.47,9273663.18,1104.6 +2/22/17 0:00,1124.83,1143,1098.16,1130.01,11673.09,13146726.04,1126.24 +2/23/17 0:00,1130.02,1193.92,1120.73,1188.11,11753.24,13558056.03,1153.56 +2/24/17 0:00,1183.53,1220,1091.1,1180.14,19598.23,23023500.2,1174.77 +2/25/17 0:00,1180.05,1184.73,1127,1152.2,6238.44,7226444.77,1158.37 +2/26/17 0:00,1152.11,1184.97,1133,1179.05,3536.49,4127618.08,1167.15 +2/27/17 0:00,1180.46,1197.99,1169,1194.64,4640.77,5511306.9,1187.58 +2/28/17 0:00,1194.64,1209.99,1176.27,1191.21,7562.47,9017082.51,1192.35 +3/1/17 0:00,1191.16,1231,1188.3,1226.39,6013.07,7276140.33,1210.05 +3/2/17 0:00,1228,1283.25,1215,1257.6,7762.73,9679539.51,1246.92 +3/3/17 0:00,1256.32,1298,1255,1285.33,7545.48,9615648.42,1274.36 +3/4/17 0:00,1287.38,1289.55,1230,1260,4951.23,6237601.9,1259.81 +3/5/17 0:00,1260,1275,1240,1273,3026.02,3809526.44,1258.92 +3/6/17 0:00,1269.98,1285,1251,1278.49,4249.65,5420772.93,1275.58 +3/7/17 0:00,1278.49,1281.62,1175.97,1233.05,14450.04,17887749.61,1237.9 +3/8/17 0:00,1233.86,1245.49,1145.5,1150.05,13399.44,15905595.45,1187.03 +3/9/17 0:00,1149.34,1208,1135.01,1190.89,7654.74,9019820.32,1178.33 +3/10/17 0:00,1190.99,1350,975,1116.97,33467.94,38830456.29,1160.23 +3/11/17 0:00,1111.55,1198,1106.51,1176.55,7346.79,8529224.3,1160.95 +3/12/17 0:00,1176.58,1241.85,1170.9,1226.62,5793.91,7003314.41,1208.74 +3/13/17 0:00,1226.42,1249.02,1215.43,1242.46,5637.36,6963555.14,1235.25 +3/14/17 0:00,1242.46,1260,1226.5,1245.86,5745.5,7156458.55,1245.58 +3/15/17 0:00,1245.82,1260,1238.32,1257.32,3930.5,4920843.36,1251.96 +3/16/17 0:00,1258.75,1258.96,1130,1172.62,15246.55,18265147.56,1197.99 +3/17/17 0:00,1172,1173.23,1063,1071.02,16704.57,18711161.79,1120.12 +3/18/17 0:00,1068.16,1099,944.36,969.4,21292.6,21545879.41,1011.89 +3/19/17 0:00,969.4,1063.7,969.4,1017.97,12201.86,12333561.01,1010.79 +3/20/17 0:00,1012.88,1053,1010.01,1035.96,7497.91,7764665.81,1035.58 +3/21/17 0:00,1036.23,1117.9,1034.92,1114.39,11125.5,12068049.72,1084.72 +3/22/17 0:00,1114.39,1114.39,988,1037.56,14521.26,15061002.63,1037.17 +3/23/17 0:00,1037.6,1054.4,1010.69,1029.65,5917,6134595.75,1036.78 +3/24/17 0:00,1030.84,1032,920,929.06,16072.42,15697051.68,976.65 +3/25/17 0:00,928.1,963.36,891.33,956.02,13507.44,12469646.95,923.17 +3/26/17 0:00,956.02,997,937.52,960,9012.92,8743359.97,970.09 +3/27/17 0:00,959.08,1043,952.95,1039.92,8687.09,8753826.09,1007.68 +3/28/17 0:00,1039.09,1068.26,1014.9,1043.99,9115.52,9538479.55,1046.4 +3/29/17 0:00,1042.83,1059.43,1008.16,1035.96,9379.72,9675900.4,1031.58 +3/30/17 0:00,1037.29,1050.17,1017.14,1033.7,8645.38,8919386.18,1031.69 +3/31/17 0:00,1033.79,1081,1031.43,1070.31,12058.79,12711331.62,1054.11 +4/1/17 0:00,1071.34,1097,1061.64,1083.94,5209.13,5617315.77,1078.36 +4/2/17 0:00,1083.95,1100,1066.49,1078.01,7639.9,8274688.32,1083.09 +4/3/17 0:00,1076.59,1152,1076.19,1144.77,11076.86,12487637.63,1127.36 +4/4/17 0:00,1146.42,1162.93,1118.85,1143,7420.87,8471842.7,1141.62 +4/5/17 0:00,1143,1145,1111,1134.58,5588.98,6299621.45,1127.15 +4/6/17 0:00,1134.86,1201.37,1134.36,1190.85,9134.3,10690571.43,1170.38 +4/7/17 0:00,1190.72,1202,1176,1190.66,5450.27,6486716.5,1190.16 +4/8/17 0:00,1190.66,1197.3,1166.66,1184.03,2720.8,3224244.36,1185.04 +4/9/17 0:00,1184.05,1216.87,1173.98,1206.2,4817.42,5764973.91,1196.69 +4/10/17 0:00,1205.2,1219.71,1195,1209.25,3656.82,4416613.87,1207.78 +4/11/17 0:00,1209.99,1229,1198.02,1218.99,3487.75,4229830.76,1212.77 +4/12/17 0:00,1218.99,1224.57,1208,1212.17,3475.19,4224875.8,1215.72 +4/13/17 0:00,1212.16,1219.45,1141,1172.91,7135.3,8410788.79,1178.76 +4/14/17 0:00,1172.56,1193,1142,1170.34,8982.77,10475652.88,1166.19 +4/15/17 0:00,1169.53,1192.5,1165,1173.45,1804.45,2128067.82,1179.34 +4/16/17 0:00,1171,1179.3,1150,1162.31,2663.99,3094769.46,1161.7 +4/17/17 0:00,1162.31,1192.5,1161,1176.54,3568.83,4210491.81,1179.8 +4/18/17 0:00,1176.59,1210,1175.95,1202.58,5514.18,6636782.79,1203.58 +4/19/17 0:00,1202.59,1209.99,1190,1203.98,6046.52,7247878.76,1198.69 +4/20/17 0:00,1203.98,1243.88,1201.66,1234.19,5972.94,7302307.33,1222.57 +4/21/17 0:00,1234.93,1252.32,1231.65,1243.61,5023.04,6236859.58,1241.65 +4/22/17 0:00,1243.6,1247.15,1199,1233.2,4173.28,5126960.21,1228.52 +4/23/17 0:00,1231.85,1248.99,1223.42,1241.99,2604.72,3219987.14,1236.21 +4/24/17 0:00,1242,1254,1234,1253.58,3048.69,3788507.32,1242.67 +4/25/17 0:00,1253.57,1280,1251.07,1269,5249.77,6651231.27,1266.96 +4/26/17 0:00,1271.4,1330,1265,1287.99,8707.64,11266221.16,1293.83 +4/27/17 0:00,1287.92,1342.02,1285,1331.53,6669.99,8810060.91,1320.85 +4/28/17 0:00,1331,1347.02,1299,1330.7,8234.28,10882526.4,1321.61 +4/29/17 0:00,1333.93,1342.8,1316,1333,3927.45,5226089.45,1330.66 +4/30/17 0:00,1336.77,1356,1315.68,1350.21,3536.18,4718587.81,1334.38 +5/1/17 0:00,1348.88,1425,1341.22,1390.86,8604.41,11923132.22,1385.7 +5/2/17 0:00,1390.86,1481.73,1388,1447.75,8920.69,12836209.77,1438.93 +5/3/17 0:00,1447.75,1516.04,1431.02,1503.22,11077.59,16380266.84,1478.69 +5/4/17 0:00,1503.25,1623.01,1444.94,1537.23,19548.53,30115422.58,1540.55 +5/5/17 0:00,1537.36,1609,1495,1514.9,15603.68,24257672.5,1554.61 +5/6/17 0:00,1515.39,1578.97,1505,1558.02,7901.14,12179087.26,1541.43 +5/7/17 0:00,1554.04,1565,1525,1554.01,7667.79,11809645.33,1540.16 +5/8/17 0:00,1554.38,1653.97,1554.38,1649.54,19262.35,30652223.06,1591.3 +5/9/17 0:00,1649.55,1760.4,1616.05,1720.28,16807.73,28741490.64,1710.02 +5/10/17 0:00,1720.26,1794.21,1682,1772.58,11265.38,19697560.66,1748.5 +5/11/17 0:00,1774.52,1892,1745.01,1828.45,14740.22,26997283.11,1831.54 +5/12/17 0:00,1830.85,1832,1650,1691.51,17062.84,29856372.54,1749.79 +5/13/17 0:00,1690.33,1785.82,1601.46,1776.89,10532.23,17797259.46,1689.79 +5/14/17 0:00,1776.8,1815,1753,1784,4816.73,8585557.39,1782.44 +5/15/17 0:00,1782.3,1784,1682.56,1705.48,13316.8,22951575.79,1723.51 +5/16/17 0:00,1706.07,1750,1641.22,1700.01,16554.09,28255380.02,1706.85 +5/17/17 0:00,1700.01,1840.42,1670.01,1782.99,17797.32,31598676.87,1775.47 +5/18/17 0:00,1779,1881.8,1766.33,1878.99,11916.12,21766168.13,1826.61 +5/19/17 0:00,1879,1969.99,1874.99,1958,14124.62,27218796.92,1927.05 +5/20/17 0:00,1959.09,2021,1914,2013.99,12309.95,24248476.49,1969.83 +5/21/17 0:00,2013.99,2063.11,1961.52,2017.55,6755.83,13647083.74,2020.04 +5/22/17 0:00,2017.55,2230.34,2001,2099.97,16338.54,34535101.53,2113.72 +5/23/17 0:00,2088.76,2275,2088.45,2264.23,14138.92,31139771.52,2202.42 +5/24/17 0:00,2265.9,2481.75,2264.64,2420.29,22144.6,52782234.68,2383.53 +5/25/17 0:00,2420.29,2760.1,2221,2292.53,32964.76,83028907.05,2518.72 +5/26/17 0:00,2277.01,2599.12,2060,2279.82,28326.82,66466402.58,2346.41 +5/27/17 0:00,2280.7,2339.94,1850,2042,26216.28,54113571.78,2064.12 +5/28/17 0:00,2042.89,2313.96,2041.48,2178.81,15375.22,33655092.72,2188.92 +5/29/17 0:00,2172.54,2350,2110,2292.1,12129.28,27376000.71,2257.02 +5/30/17 0:00,2290.18,2339.08,2160,2203.51,13888.85,31434633.58,2263.3 +5/31/17 0:00,2201.7,2334,2154.28,2298.01,16279.29,36862481.13,2264.38 +6/1/17 0:00,2298.01,2456.39,2296.81,2413.63,18401.66,43990417.87,2390.57 +6/2/17 0:00,2413.58,2489,2370.03,2488.94,12428.7,30186115.46,2428.74 +6/3/17 0:00,2488.94,2584.34,2445,2540.94,10810.58,27295110.64,2524.85 +6/4/17 0:00,2538.1,2569,2460,2530.27,8828.62,22263365.05,2521.73 +6/5/17 0:00,2530.27,2699.35,2525.28,2698,12221.44,31919212.32,2611.74 +6/6/17 0:00,2698,2933,2689.4,2880.74,26695.67,75249106.78,2818.78 +6/7/17 0:00,2875.71,2893.28,2612.5,2683.03,16967.04,47205426.44,2782.18 +6/8/17 0:00,2680.24,2815,2613.13,2806,13251.41,36430027.3,2749.14 +6/9/17 0:00,2805.46,2868.23,2780.43,2822.32,8036.31,22701156.24,2824.82 +6/10/17 0:00,2818,2912.98,2800,2899.99,9593.36,27434766.12,2859.77 +6/11/17 0:00,2899.01,2967.46,2861.53,2954.22,6830.61,19889995.68,2911.89 +6/12/17 0:00,2954.23,2980,2480.03,2667.06,31534.95,85799286.22,2720.77 +6/13/17 0:00,2656.97,2783.62,2638.62,2703.02,16275.83,44217458.2,2716.76 +6/14/17 0:00,2705.99,2801,2320,2450,29478.82,76114991.86,2582.02 +6/15/17 0:00,2451.42,2516.2,2120,2424.91,36751.09,84952914.56,2311.58 +6/16/17 0:00,2424.96,2540,2307.66,2484.68,13802.39,33831741.34,2451.15 +6/17/17 0:00,2484.68,2674.4,2420.53,2630,11142.99,28569796.41,2563.93 +6/18/17 0:00,2630.06,2665,2460.62,2516.98,9424.23,24023680.66,2549.14 +6/19/17 0:00,2516.98,2598,2480.96,2598,10570.67,27011860.51,2555.36 +6/20/17 0:00,2598,2783,2581,2740,15891.7,42538164.74,2676.75 +6/21/17 0:00,2734.03,2789,2611,2657.04,15676.32,42304860.31,2698.65 +6/22/17 0:00,2658.67,2740,2594.61,2713.48,10792.79,28955845.86,2682.89 +6/23/17 0:00,2713.48,2745,2657.46,2685.05,8232.48,22348019.56,2714.62 +6/24/17 0:00,2685.05,2724.96,2512.13,2557.66,13342.82,34944802.73,2619 +6/25/17 0:00,2557.66,2635.72,2432,2502.03,10339.57,26249953.89,2538.78 +6/26/17 0:00,2502.26,2550.03,2315.01,2421.22,22094.21,54096000.65,2448.42 +6/27/17 0:00,2421.21,2565,2291,2565,19129.6,45774590.05,2392.87 +6/28/17 0:00,2565,2593,2468.01,2559.9,12274.55,31106035.77,2534.19 +6/29/17 0:00,2550.99,2594.78,2500.31,2541.59,8125.38,20771078.34,2556.32 +6/30/17 0:00,2538.41,2564.58,2451,2465.49,8715.72,21948643.03,2518.28 +7/1/17 0:00,2465.48,2517.11,2390,2412.41,8885.89,21750491.24,2447.76 +7/2/17 0:00,2421.3,2528.72,2374.34,2504.37,8744.53,21386273.68,2445.67 +7/3/17 0:00,2504.81,2595,2472.68,2550.47,11520.71,29213705.74,2535.76 +7/4/17 0:00,2540.07,2639.47,2540.07,2596.12,9662.76,25107482.88,2598.38 +7/5/17 0:00,2594.52,2623,2530.1,2602.9,10882.84,27950344.7,2568.3 +7/6/17 0:00,2598.37,2614,2522,2600.39,8333.98,21529601.71,2583.35 +7/7/17 0:00,2599.01,2605,2475,2501.46,9430.62,23870564.36,2531.18 +7/8/17 0:00,2501.46,2555,2462,2550.07,5405.89,13584489.52,2512.9 +7/9/17 0:00,2550.13,2564.65,2500.5,2502.28,4483.14,11362427.97,2534.48 +7/10/17 0:00,2504,2527.88,2261.85,2323.45,17296.34,41650843.77,2408.07 +7/11/17 0:00,2326.12,2399,2242.62,2305.98,17580.96,41145883.94,2340.37 +7/12/17 0:00,2301.55,2408.84,2239.54,2388,12883.55,30256255.57,2348.44 +7/13/17 0:00,2383.97,2425.16,2312.93,2339.99,8429.85,19957908.68,2367.53 +7/14/17 0:00,2340,2357.84,2140,2213.37,12659.02,28326253.11,2237.63 +7/15/17 0:00,2209.44,2219.99,1967.65,1970.51,19886.68,40927009.37,2058.01 +7/16/17 0:00,1970.51,2044.44,1830,1917.63,25562.71,49010482.23,1917.27 +7/17/17 0:00,1917.72,2229.97,1910.57,2226,26448.5,55057438.35,2081.68 +7/18/17 0:00,2226,2392,2164,2303.71,25204.49,57683997.07,2288.64 +7/19/17 0:00,2308.82,2402.5,2223,2265.21,17009.88,39248446.86,2307.39 +7/20/17 0:00,2265.51,2938,2265.51,2875.03,30989.89,80388738.23,2594.03 +7/21/17 0:00,2880,2887.45,2611.39,2670,20724.16,56456355.34,2724.18 +7/22/17 0:00,2667.01,2882,2644.59,2832.71,12819.63,35805144.41,2792.99 +7/23/17 0:00,2824.82,2855,2640,2749.02,11001.96,30310788.74,2755.03 +7/24/17 0:00,2756.8,2800,2701,2759.98,10001.92,27562701.02,2755.74 +7/25/17 0:00,2759.95,2775.79,2450,2564.82,21326.55,55120720.9,2584.61 +7/26/17 0:00,2564.74,2608.96,2400,2524.99,17713.67,44091449.21,2489.12 +7/27/17 0:00,2524.99,2695,2509.63,2666.33,11826.48,30470923.86,2576.5 +7/28/17 0:00,2666.33,2825,2655.82,2777.01,14069.5,38675840.62,2748.91 +7/29/17 0:00,2777.01,2785.65,2631.78,2680.56,10782.22,29159800.95,2704.43 +7/30/17 0:00,2681.6,2774.45,2569.69,2742.37,9678.16,25848893.08,2670.85 +7/31/17 0:00,2745.76,2889.99,2680.01,2855.81,11114.34,30777296.21,2769.15 +8/1/17 0:00,2855.81,2929.17,2615,2731,12525.08,34322796.56,2740.33 +8/2/17 0:00,2732,2760,2650,2703.51,9486.63,25701106.1,2709.19 +8/3/17 0:00,2703.51,2807.44,2698.83,2793.37,7963.7,21938304,2754.79 +8/4/17 0:00,2793.34,2877.52,2765.91,2855,7635.82,21650087.64,2835.33 +8/5/17 0:00,2851.01,3339.66,2848.32,3263.62,16996.27,53861926.03,3169.04 +8/6/17 0:00,3263.51,3296.51,3146.1,3222.75,5998.74,19412655.97,3236.12 +8/7/17 0:00,3216.78,3430,3186,3387.55,12046.12,40225852,3339.32 +8/8/17 0:00,3387.54,3490,3300,3412.41,15835.37,54054824.25,3413.55 +8/9/17 0:00,3408.46,3423.1,3178.72,3342.99,14286.84,47356464.4,3314.69 +8/10/17 0:00,3342.99,3448,3311.17,3413.03,9031.12,30658440.51,3394.75 +8/11/17 0:00,3410,3705,3390.67,3645.06,11927.37,41888022.26,3511.92 +8/12/17 0:00,3651.74,3934,3586.95,3855.1,12351.07,46799798.4,3789.13 +8/13/17 0:00,3855.04,4190,3841.71,4053.87,15889.83,63672103.45,4007.1 +8/14/17 0:00,4053.87,4329.43,3964.96,4306.23,14212.3,59836002.81,4210.15 +8/15/17 0:00,4320.95,4400,3800,4155.67,25515.72,104651369.1,4101.45 +8/16/17 0:00,4154.99,4379.78,3926.06,4378.84,12923.64,54194325.8,4193.43 +8/17/17 0:00,4361.99,4480,4167.21,4276.5,14573.19,63228604.19,4338.69 +8/18/17 0:00,4260.47,4368,3964.96,4100,17516.99,73224029.24,4180.17 +8/19/17 0:00,4100,4188,3900,4099.55,15036.18,60604905.58,4030.6 +8/20/17 0:00,4091.99,4125.95,4000,4058.68,6237.97,25289638.6,4054.14 +8/21/17 0:00,4058.64,4080,3949.78,3987.52,9782.06,39203908.96,4007.74 +8/22/17 0:00,3987.51,4139.31,3600,4085,23522.76,91600325.72,3894.12 +8/23/17 0:00,4078,4248.97,4051.94,4108.12,14979.4,62117291.56,4146.85 +8/24/17 0:00,4121.78,4350,4082.57,4300.34,10782.69,45174068.67,4189.5 +8/25/17 0:00,4308.8,4449.98,4270,4355.98,9699.61,42274063.39,4358.33 +8/26/17 0:00,4348.17,4369.78,4232.43,4333.38,6559.67,28227915.28,4303.25 +8/27/17 0:00,4333.38,4393.3,4290.32,4337.68,3979.12,17263967.23,4338.63 +8/28/17 0:00,4329.91,4399.72,4169.01,4379.99,8641,37077203.39,4290.85 +8/29/17 0:00,4385,4649.78,4336.26,4578.82,11879.64,53491168.4,4502.76 +8/30/17 0:00,4578.82,4642.22,4479,4573.2,8720.04,39800539.16,4564.26 +8/31/17 0:00,4573.15,4765.21,4566.66,4734.26,8911.41,41726210.49,4682.33 +9/1/17 0:00,4734.26,4935,4671.09,4921.7,15367.53,73889667.5,4808.17 +9/2/17 0:00,4921.71,4979.9,4488.5,4599.9,16977.79,79492706.8,4682.16 +9/3/17 0:00,4599.9,4700,4385,4606.26,11224.91,51112035,4553.45 +9/4/17 0:00,4603.68,4613.97,4058.5,4277,25069.13,107792606.9,4299.82 +9/5/17 0:00,4282.53,4474.88,4001.93,4396.52,20884.22,88444473.15,4234.99 +9/6/17 0:00,4397.38,4649.23,4356.42,4605.8,16634.1,75306805.37,4527.25 +9/7/17 0:00,4605.81,4674.34,4475,4615,9254.85,42435328.99,4585.2 +9/8/17 0:00,4615,4679.97,4125.99,4312,23693.3,103569423.2,4371.25 +9/9/17 0:00,4327.41,4379.78,4164,4308.72,9974.39,42689334.62,4279.9 +9/10/17 0:00,4322.44,4322.44,4010,4226.22,14724.2,61040452.05,4145.59 +9/11/17 0:00,4226.21,4353.49,4092.4,4207.31,11191.6,47148979.98,4212.89 +9/12/17 0:00,4198.89,4377.65,4080,4172.56,13294.44,56268039.79,4232.45 +9/13/17 0:00,4170.6,4179.14,3720.01,3865.34,29636.38,115516897.1,3897.81 +9/14/17 0:00,3861.89,3921.74,3210,3227.79,41319.4,146101729.7,3535.91 +9/15/17 0:00,3227.79,3820,2972.01,3700.01,60278.95,203168386,3370.47 +9/16/17 0:00,3699.89,3872.9,3500,3678.93,23158.23,84905914.76,3666.34 +9/17/17 0:00,3669.07,3772.52,3463,3662.99,11770.22,42657776.56,3624.21 +9/18/17 0:00,3662.95,4122.7,3659.42,4101.6,17207.6,67988469.46,3951.07 +9/19/17 0:00,4102,4119.7,3848.35,3888.8,15278.93,60404491.57,3953.45 +9/20/17 0:00,3888.8,4050,3820.56,3874.46,11660.27,45847238.86,3931.92 +9/21/17 0:00,3864,3914,3573,3617.05,17904.3,67012342.18,3742.81 +9/22/17 0:00,3607.83,3761.84,3514,3612.18,15007.74,54271184.82,3616.21 +9/23/17 0:00,3611.91,3810.25,3552,3779.17,10123.23,37609099.77,3715.13 +9/24/17 0:00,3781.13,3783.25,3615,3664.22,6508.72,24009707.82,3688.85 +9/25/17 0:00,3667.01,3968.59,3658.39,3918,14979.78,57632408.09,3847.35 +9/26/17 0:00,3920.66,3970,3850.61,3888.03,10014.62,39220959.7,3916.37 +9/27/17 0:00,3883.95,4226.73,3870,4199.29,15075.84,61255648.53,4063.17 +9/28/17 0:00,4202.34,4270.01,4123.5,4184.84,9625.93,40309035.77,4187.55 +9/29/17 0:00,4183.47,4227.62,4022.02,4164.82,12191.93,50336312.65,4128.66 +9/30/17 0:00,4162.04,4349,4154.28,4326.09,7496.78,32073448.82,4278.3 +10/1/17 0:00,4326.09,4377.22,4216,4377.22,7211.34,30871704.79,4281 +10/2/17 0:00,4369.33,4453,4352,4391.48,8259.82,36293751.01,4394.01 +10/3/17 0:00,4391.41,4425,4218,4315.83,12468.06,53551605.48,4295.1 +10/4/17 0:00,4315.73,4343,4170.07,4219.53,8518.99,36068254.94,4233.86 +10/5/17 0:00,4219.74,4358.97,4137.96,4301.09,8419.96,35912272.84,4265.13 +10/6/17 0:00,4301.09,4425,4278.4,4362.95,8377.3,36552658.15,4363.3 +10/7/17 0:00,4356.09,4463,4312.82,4423.3,4646.41,20320065.24,4373.29 +10/8/17 0:00,4422.72,4612,4404.5,4597.98,10083.87,45490443.57,4511.21 +10/9/17 0:00,4597.97,4865,4541,4764.7,12923.67,60473207.29,4679.26 +10/10/17 0:00,4761.67,4909.97,4700,4749.29,11666.02,56088789.35,4807.87 +10/11/17 0:00,4747.9,4869.78,4700,4822.01,9866.9,47256315.42,4789.38 +10/12/17 0:00,4822.01,5445,4793.66,5445,19916.23,102559923.2,5149.57 +10/13/17 0:00,5444,5846.43,5380,5653.6,27827.38,155848051.9,5600.53 +10/14/17 0:00,5653.58,5817.34,5558.26,5801.29,8410.94,47779356.45,5680.62 +10/15/17 0:00,5795.01,5830,5415,5679.7,11542.95,64464008.42,5584.71 +10/16/17 0:00,5683.92,5807.11,5548,5745.72,9066.8,51508749.11,5681.03 +10/17/17 0:00,5752.2,5776.31,5506.78,5597.31,9105.28,51035573.57,5605.05 +10/18/17 0:00,5591.97,5617.75,5101.36,5582.05,18961.68,101657249.6,5361.2 +10/19/17 0:00,5572.64,5735.14,5512.06,5698.69,9919.84,56087316.56,5654.06 +10/20/17 0:00,5700,6074,5589.8,5977.29,15109.06,89014299.88,5891.45 +10/21/17 0:00,5977.26,6180,5871,6013.46,15357.42,92912504.83,6050.01 +10/22/17 0:00,6010.85,6071.07,5700,5969,12232.52,71898806.16,5877.68 +10/23/17 0:00,5967.38,6045.34,5617.75,5871.17,15331.64,89530043.16,5839.56 +10/24/17 0:00,5869.82,5870,5453.01,5523.4,17286.72,97560230.19,5643.65 +10/25/17 0:00,5518.49,5748,5366,5735.88,12349.11,68370411.13,5536.47 +10/26/17 0:00,5741.35,5988,5683,5890,11788.3,69079451.81,5860 +10/27/17 0:00,5889.99,5994.06,5674.06,5771.89,11911.04,69053794.44,5797.46 +10/28/17 0:00,5773.03,5875.66,5646.18,5730.69,5926.3,33993472.69,5736.04 +10/29/17 0:00,5731.7,6316.85,5683,6137.37,16086.63,96361934.29,5990.19 +10/30/17 0:00,6133.01,6229.77,6024.03,6119.99,9574.64,58637080.14,6124.2 +10/31/17 0:00,6120,6449.78,6072.81,6434.21,13423.31,84354477.98,6284.18 +11/1/17 0:00,6434.24,6756.36,6340.01,6741.59,12967.22,84891427.3,6546.62 +11/2/17 0:00,6745.04,7354.1,6700,7030,26514.65,184979262.3,6976.49 +11/3/17 0:00,7030,7500,6925.22,7146.82,15999.78,116037544.2,7252.44 +11/4/17 0:00,7145.24,7569.9,6994,7388.83,9200.11,66730277.42,7253.2 +11/5/17 0:00,7388.79,7590,7275.16,7372.72,9218.33,68631151.85,7445.08 +11/6/17 0:00,7373.3,7421.39,6922.07,6967.68,19323.13,138542391.8,7169.77 +11/7/17 0:00,6967.64,7244.69,6945,7130.28,11634.27,82632999.59,7102.55 +11/8/17 0:00,7131.38,7888,7080.01,7450.32,24679.72,183501327.1,7435.31 +11/9/17 0:00,7465.06,7490,7061.2,7148,14799.27,106985408.5,7229.1 +11/10/17 0:00,7150.61,7343.18,6429.44,6588.18,26614.28,181284361.1,6811.55 +11/11/17 0:00,6588.18,6820,6218,6355.13,14942.7,96800268.09,6478.1 +11/12/17 0:00,6355.13,6488.88,5555.55,5870.37,37315.16,225565297.5,6044.87 +11/13/17 0:00,5870.03,6775.75,5846,6525.17,27202.28,173971383.6,6395.47 +11/14/17 0:00,6524.69,6750,6466.88,6609,12072.05,79508390.54,6586.15 +11/15/17 0:00,6609,7350,6609,7294,16869.9,119171192.4,7064.13 +11/16/17 0:00,7294,7976.79,7120.85,7846.96,19272.37,145738327.5,7562.03 +11/17/17 0:00,7846.96,7997,7528.5,7674.99,17714.37,138124189.4,7797.3 +11/18/17 0:00,7675,7858,7431.54,7771.03,7326.81,56264967.21,7679.33 +11/19/17 0:00,7775.55,8087.35,7675,8016.58,8307.78,65332064.65,7863.96 +11/20/17 0:00,8016.58,8269.99,7900,8226.17,8179.46,66366007.81,8113.74 +11/21/17 0:00,8226.14,8354.46,7770,8095.23,13686.51,110811263.5,8096.38 +11/22/17 0:00,8095.19,8310.89,8045.76,8214.69,8078.58,66215711.71,8196.45 +11/23/17 0:00,8214.69,8279.76,7980,7989,7811.79,63644740.39,8147.26 +11/24/17 0:00,7988.96,8340,7876,8199.19,9289.99,75751868.75,8154.14 +11/25/17 0:00,8199.83,8737,8114.78,8717.99,11611.67,97628924.79,8407.83 +11/26/17 0:00,8718,9366.6,8538.2,9271.06,12021.22,108525815.5,9027.86 +11/27/17 0:00,9278.99,9721.7,9267,9708.07,13272.45,126458117.7,9527.86 +11/28/17 0:00,9708.06,9968,9582.25,9868.82,11214.93,110496755.9,9852.65 +11/29/17 0:00,9877.63,11395,9250,9824.68,33432.34,346949562.2,10377.66 +11/30/17 0:00,9833.7,10618.29,9000,9947.67,25433.46,247093850.8,9715.31 +12/1/17 0:00,9927.29,10949.89,9370.11,10840.45,16708.03,171881760.6,10287.37 +12/2/17 0:00,10840.45,11200,10637.69,10872,9267.16,101270084.4,10927.84 +12/3/17 0:00,10875.68,11800.01,10513.16,11250,14238.53,160176290.4,11249.5 +12/4/17 0:00,11250,11613.07,10850,11613.07,13621.48,154122917.9,11314.7 +12/5/17 0:00,11613.07,11850,11384.25,11677,11875.03,138370076.2,11652.18 +12/6/17 0:00,11676.99,13700,11659.8,13623.5,19784.87,250560790.3,12664.26 +12/7/17 0:00,13623,16615.62,13085.9,16599.99,25787.68,382694044.1,14840.19 +12/8/17 0:00,16599.99,16666.66,13482.42,15800,25473.4,389366035.1,15285.2 +12/9/17 0:00,15799.87,15998.5,12701.05,14607.49,16587.47,238909072.6,14402.98 +12/10/17 0:00,14601.01,15385,13011,14691,18487.98,263150668,14233.61 +12/11/17 0:00,14690.99,17270,14677.19,16470,16583.72,270203844.3,16293.32 +12/12/17 0:00,16470,17428.42,15967.29,16650.01,13517.89,227842348.7,16854.87 +12/13/17 0:00,16650.01,17107.03,15497.69,16250,17136.71,281640493.8,16434.93 +12/14/17 0:00,16245.02,16830.45,15852.69,16404.99,13409.52,219263192.5,16351.3 +12/15/17 0:00,16404.99,17934,16337.19,17471.5,18998.39,329337014.8,17335 +12/16/17 0:00,17477.98,19377,17269.99,19187.78,9761.22,179906034.6,18430.69 +12/17/17 0:00,19187.78,19666,18465,18953,9749.25,186310530.8,19110.24 +12/18/17 0:00,18953,19220,17835.2,18940.57,14678.94,273533539.4,18634.42 +12/19/17 0:00,18940.58,19160.79,16831.26,17700,21528.14,387206242.4,17986.05 +12/20/17 0:00,17700,17950,15343.04,16466.98,31172.23,521350550.6,16724.84 +12/21/17 0:00,16466.98,17281.17,15005,15600.01,20377.86,326730689.7,16033.61 +12/22/17 0:00,15600,15795.61,11159.93,14009.79,57444.89,772143020.6,13441.46 +12/23/17 0:00,13980,15756.22,13496.48,14619,21786.41,322120599.6,14785.39 +12/24/17 0:00,14619,14619.1,12488,14157.87,18519.49,250476423.6,13525.02 +12/25/17 0:00,14107.87,14650,13210,13911.28,11564.54,161347357.1,13951.9 +12/26/17 0:00,13925.5,16147.87,13746.95,15764.44,15051.16,230922569.2,15342.51 +12/27/17 0:00,15764.45,16480.52,14484,15364.93,15643.75,241860985.1,15460.55 +12/28/17 0:00,15390.05,15474.19,13500,14470.07,16557.22,234428578.5,14158.69 +12/29/17 0:00,14436.99,15111,13998,14340,13505.7,195727638.8,14492.22 +12/30/17 0:00,14351,14463.28,12050,12640,21749.67,285985550.3,13148.96 +12/31/17 0:00,12640,14296.06,12491.21,13880,11583.42,154552644.5,13342.58 +1/1/18 0:00,13880,13941.75,12801.38,13443.41,7688.03,102815718.5,13373.48 +1/2/18 0:00,13394.2,15257.53,12910.58,14678.94,16299.67,229138844.1,14057.88 +1/3/18 0:00,14670.96,15500,14546.28,15155.62,12275,183639413.7,14960.44 +1/4/18 0:00,15155.62,15430.27,14192.37,15143.67,15004.02,222160244.5,14806.72 +1/5/18 0:00,15143.67,17200,14810,16928,16248.91,259695892.9,15982.35 +1/6/18 0:00,16927.99,17234.99,16220,17149.67,9501.02,158389594.5,16670.8 +1/7/18 0:00,17142.43,17149.97,15707.16,16124.02,8632.81,141555094.2,16397.33 +1/8/18 0:00,16173.98,16300,13900,14999.99,16676.35,251877784.1,15103.89 +1/9/18 0:00,14999.99,15367.18,14123.97,14403.51,13913.52,204579121.6,14703.62 +1/10/18 0:00,14403.51,14900,13412,14890.02,18479.01,261210687,14135.53 +1/11/18 0:00,14899.99,14973.07,12800,13243.83,19630.08,266846190.8,13593.74 +1/12/18 0:00,13249.99,14152.19,12807.27,13781.41,13433.08,183399343.5,13652.81 +1/13/18 0:00,13829.28,14619.1,13789.42,14197.78,7488.99,106595650.1,14233.65 +1/14/18 0:00,14197.78,14365.81,13072.22,13647.99,7588.64,102783826.7,13544.44 +1/15/18 0:00,13647.99,14394.36,13429.25,13607.04,9444.64,131049852.4,13875.59 +1/16/18 0:00,13581.66,13607.04,10162,11386.34,38789.88,457725763.4,11800.13 +1/17/18 0:00,11393.97,11794.07,9222,11191.35,41356.19,433168120.1,10474.08 +1/18/18 0:00,11199,12146,10693,11247.57,22195.26,256082650,11537.72 +1/19/18 0:00,11290.9,12050.39,11025.18,11552,13203.26,151834406.4,11499.77 +1/20/18 0:00,11560.82,13052.12,11515.94,12775.99,10324.06,128912075.5,12486.57 +1/21/18 0:00,12782.99,12791.88,11100,11558.87,11596.44,137150604.6,11826.96 +1/22/18 0:00,11558.87,11910.78,10028.41,10808.99,17067.73,186028930.6,10899.45 +1/23/18 0:00,10810,11409.87,9927.54,10851.82,17250.18,184369293.7,10687.97 +1/24/18 0:00,10848.99,11500,10488.13,11400.96,11646.79,128825119.7,11061 +1/25/18 0:00,11400.98,11741.82,10868.57,11155.54,10212.92,115421975,11301.56 +1/26/18 0:00,11140.01,11635,10263.32,11092.95,15074.67,164020352.5,10880.53 +1/27/18 0:00,11093.74,11630.47,10815.84,11446.54,10309.81,116376809.5,11287.97 +1/28/18 0:00,11446.54,11989.15,11360.52,11685.58,9956.09,116531522.5,11704.54 +1/29/18 0:00,11685.58,11820.01,10991,11162.62,9996.36,112806531.8,11284.76 +1/30/18 0:00,11162.62,11222.36,9731.2,9971,21082.19,219169216.4,10395.94 +1/31/18 0:00,9971,10324,9514.96,10149,12743.51,126723437.3,9944.16 +2/1/18 0:00,10148.99,10187.56,8455,8998.99,26356.59,243067799.6,9222.28 +2/2/18 0:00,9010.87,9096.79,7625.25,8838.83,44406.02,373423805.1,8409.31 +2/3/18 0:00,8838.29,9491.2,8170.71,9225.86,16208.37,144713399.5,8928.31 +2/4/18 0:00,9225.31,9350.09,7825,8191,19110.67,162005632.6,8477.23 +2/5/18 0:00,8190.78,8335.56,6600,6874.27,46544.43,342210634.7,7352.34 +2/6/18 0:00,6878.65,8150,5920.72,7737.37,70961.37,487504446,6870 +2/7/18 0:00,7737.26,8649,7213.8,7588.01,32640.89,260674865.8,7986.14 +2/8/18 0:00,7588.01,8644.36,7565.5,8259.76,22746.39,187157997.7,8228.03 +2/9/18 0:00,8259.42,8779.62,7753.32,8693.98,18418.12,152040059.9,8254.92 +2/10/18 0:00,8693.98,9090.8,8170.86,8560,14670.56,127082570.7,8662.42 +2/11/18 0:00,8560,8560,7820,8067,12711.79,103542540.6,8145.4 +2/12/18 0:00,8077.25,8995,8067,8899,15071.04,130557481.6,8662.8 +2/13/18 0:00,8891.2,8951.89,8360.13,8522.99,11374.52,97689771.64,8588.47 +2/14/18 0:00,8504.57,9515,8504.57,9490.98,17468.74,158839393.3,9092.78 +2/15/18 0:00,9490.98,10234,9350,10018,21036.34,206814744.9,9831.31 +2/16/18 0:00,10011.3,10300,9707.51,10196,11857.64,118860367.7,10023.95 +2/17/18 0:00,10203.14,11135.83,10053.5,11101,14511.17,155314935.2,10703.13 +2/18/18 0:00,11101,11300,10153.15,10421.06,17623.08,188724919.3,10708.96 +2/19/18 0:00,10433.68,11262.48,10307.51,11173,11698.26,127802568.1,10924.92 +2/20/18 0:00,11159.12,11780,11080.37,11233.41,15880.91,182156478,11470.15 +2/21/18 0:00,11233.42,11275.74,10256,10449.4,19959.68,215353691.9,10789.44 +2/22/18 0:00,10446.79,10935,9731.2,9843.34,20204.69,206051152.4,10198.18 +2/23/18 0:00,9843.33,10405.3,9600,10166.1,16044.04,160782973.1,10021.35 +2/24/18 0:00,10135.2,10540.63,9373.48,9689.99,13972.71,137805304.5,9862.46 +2/25/18 0:00,9688.26,9883.41,9260,9590.04,11037.18,105367249.3,9546.58 +2/26/18 0:00,9595.99,10461.97,9376.34,10324.7,16156.81,162306362.4,10045.7 +2/27/18 0:00,10329.99,10850,10150,10566.3,10408.48,110086762.8,10576.64 +2/28/18 0:00,10572.49,11064.75,10255.07,10314.9,10490.75,111457734.8,10624.38 +3/1/18 0:00,10314.99,11090,10223.41,10903.13,9481.67,101712406.7,10727.27 +3/2/18 0:00,10917.37,11175,10774.01,11029.99,8329.1,91455982.5,10980.3 +3/3/18 0:00,11032.81,11503.24,11022.85,11445,7786.27,88241259.78,11332.93 +3/4/18 0:00,11445,11511,11054.91,11463.27,6831.63,77060624.28,11279.98 +3/5/18 0:00,11479.68,11688,11383.66,11419.24,9933.68,114638102.2,11540.35 +3/6/18 0:00,11417.39,11420.01,10560.19,10723.76,14734.42,161507885.7,10961.27 +3/7/18 0:00,10723.01,10911.78,9450,9913.03,26116.51,264099759.8,10112.37 +3/8/18 0:00,9904.52,10150,9078.95,9285.32,22522.43,217370120.9,9651.27 +3/9/18 0:00,9293.06,9420.39,8366,9230,29014.17,257069817.6,8860.15 +3/10/18 0:00,9228.83,9514.96,8697,8791.47,12886.59,118146982.5,9168.21 +3/11/18 0:00,8795.04,9768.37,8450,9535.04,16407.54,149758874.8,9127.44 +3/12/18 0:00,9535.04,9892,8742.07,9120.75,18751.02,175103380.1,9338.34 +3/13/18 0:00,9115.24,9482.79,8830,9142.32,16001.48,146461539.3,9153 +3/14/18 0:00,9152.07,9356.14,7948,8196.69,21191.02,182210730.9,8598.49 +3/15/18 0:00,8195.99,8425,7682,8265.05,18387.61,148610946.9,8082.12 +3/16/18 0:00,8265.05,8613.06,7914.08,8258.54,16498.62,137219222.8,8317.01 +3/17/18 0:00,8258.54,8356.4,7730.23,7860.83,12048.67,97294029.18,8075.09 +3/18/18 0:00,7860.83,8324.92,7325.37,8188.24,19547.36,150315447.6,7689.81 +3/19/18 0:00,8205.55,8718.74,8114.17,8596.93,19396.09,162448296.2,8375.31 +3/20/18 0:00,8596.79,9051,8313.01,8904.02,13454.82,116496171,8658.32 +3/21/18 0:00,8904.02,9188.1,8754.83,8893.79,12101,108594771.1,8974.03 +3/22/18 0:00,8892.18,9099.59,8503.52,8704.67,12587.63,109923828.1,8732.68 +3/23/18 0:00,8708.52,8920.79,8265,8920.79,13749.78,117134344.5,8518.99 +3/24/18 0:00,8917.99,9020,8505,8547,9731.98,85811618.32,8817.49 +3/25/18 0:00,8541.96,8680,8368.63,8453.9,9155.91,78013217.69,8520.53 +3/26/18 0:00,8451.12,8500,7831.15,8149.66,17693.42,145246518.5,8209.07 +3/27/18 0:00,8152.26,8211.62,7742.11,7791.7,12385.25,98488415.19,7952.07 +3/28/18 0:00,7791.69,8104.98,7723.03,8039.86,4732.51,37497616.99,7923.4 \ No newline at end of file diff --git a/data/btc-market-price.csv b/data/btc-market-price.csv new file mode 100644 index 0000000..4046108 --- /dev/null +++ b/data/btc-market-price.csv @@ -0,0 +1,365 @@ +2017-04-02 00:00:00,1099.169125 +2017-04-03 00:00:00,1141.813 +2017-04-04 00:00:00,1141.6003625 +2017-04-05 00:00:00,1133.0793142857142 +2017-04-06 00:00:00,1196.3079375 +2017-04-07 00:00:00,1190.45425 +2017-04-08 00:00:00,1181.1498375 +2017-04-09 00:00:00,1208.8005 +2017-04-10 00:00:00,1207.744875 +2017-04-11 00:00:00,1226.6170375 +2017-04-12 00:00:00,1218.92205 +2017-04-13 00:00:00,1180.0237125 +2017-04-14 00:00:00,1185.2600571428572 +2017-04-15 00:00:00,1184.8806714285713 +2017-04-16 00:00:00,1186.9274125 +2017-04-17 00:00:00,1205.634875 +2017-04-18 00:00:00,1216.1867428571427 +2017-04-19 00:00:00,1217.9300875 +2017-04-20 00:00:00,1241.6863250000001 +2017-04-21 00:00:00,1258.3614125 +2017-04-22 00:00:00,1261.311225 +2017-04-23 00:00:00,1257.9881125 +2017-04-24 00:00:00,1262.902775 +2017-04-25 00:00:00,1279.4146875000001 +2017-04-26 00:00:00,1309.109875 +2017-04-27 00:00:00,1345.3539125 +2017-04-28 00:00:00,1331.2944285714286 +2017-04-29 00:00:00,1334.9790375 +2017-04-30 00:00:00,1353.0045 +2017-05-01 00:00:00,1417.1728125 +2017-05-02 00:00:00,1452.0762875 +2017-05-03 00:00:00,1507.5768571428573 +2017-05-04 00:00:00,1508.292125 +2017-05-05 00:00:00,1533.3350714285714 +2017-05-06 00:00:00,1560.4102 +2017-05-07 00:00:00,1535.8684285714285 +2017-05-08 00:00:00,1640.619225 +2017-05-09 00:00:00,1721.2849714285715 +2017-05-10 00:00:00,1762.88625 +2017-05-11 00:00:00,1820.9905625 +2017-05-12 00:00:00,1720.4785 +2017-05-13 00:00:00,1771.9200125 +2017-05-14 00:00:00,1776.3165 +2017-05-15 00:00:00,1723.1269375 +2017-05-16 00:00:00,1739.031975 +2017-05-17 00:00:00,1807.4850625 +2017-05-18 00:00:00,1899.0828875 +2017-05-19 00:00:00,1961.5204875 +2017-05-20 00:00:00,2052.9097875 +2017-05-21 00:00:00,2046.5344625 +2017-05-22 00:00:00,2090.6623125 +2017-05-23 00:00:00,2287.7102875 +2017-05-24 00:00:00,2379.1938333333333 +2017-05-25 00:00:00,2387.2062857142855 +2017-05-26 00:00:00,2211.976857142857 +2017-05-27 00:00:00,2014.0529625 +2017-05-28 00:00:00,2192.9808 +2017-05-29 00:00:00,2275.9307 +2017-05-30 00:00:00,2239.2053428571426 +2017-05-31 00:00:00,2285.9339142857143 +2017-06-01 00:00:00,2399.2426714285716 +2017-06-02 00:00:00,2446.142414285714 +2017-06-03 00:00:00,2525.7651584699997 +2017-06-04 00:00:00,2516.173142857143 +2017-06-05 00:00:00,2698.3138125 +2017-06-06 00:00:00,2883.3136966371426 +2017-06-07 00:00:00,2664.9208625 +2017-06-08 00:00:00,2792.9991875 +2017-06-09 00:00:00,2827.4913 +2017-06-10 00:00:00,2845.3728571428574 +2017-06-11 00:00:00,2961.8296124999997 +2017-06-12 00:00:00,2657.6750625 +2017-06-13 00:00:00,2748.185085714286 +2017-06-14 00:00:00,2447.0415625 +2017-06-15 00:00:00,2442.48025 +2017-06-16 00:00:00,2464.9598142857144 +2017-06-17 00:00:00,2665.927 +2017-06-18 00:00:00,2507.389252144286 +2017-06-19 00:00:00,2617.2102625 +2017-06-20 00:00:00,2754.97825 +2017-06-21 00:00:00,2671.04325 +2017-06-22 00:00:00,2727.2880125 +2017-06-23 00:00:00,2710.4122857142856 +2017-06-24 00:00:00,2589.1648875 +2017-06-25 00:00:00,2512.3662857142854 +2017-06-26 00:00:00,2436.4510571428573 +2017-06-27 00:00:00,2517.9031142857143 +2017-06-28 00:00:00,2585.349185714286 +2017-06-29 00:00:00,2544.414475 +2017-06-30 00:00:00,2477.641375 +2017-07-01 00:00:00,2434.0778625 +2017-07-02 00:00:00,2501.191342857143 +2017-07-03 00:00:00,2561.225428571429 +2017-07-04 00:00:00,2599.7298375 +2017-07-05 00:00:00,2619.1875030042856 +2017-07-06 00:00:00,2609.96775 +2017-07-07 00:00:00,2491.201214285714 +2017-07-08 00:00:00,2562.1306624999997 +2017-07-09 00:00:00,2536.2389375 +2017-07-10 00:00:00,2366.1701428571428 +2017-07-11 00:00:00,2369.8621285714285 +2017-07-12 00:00:00,2385.7485714285717 +2017-07-13 00:00:00,2354.7834166666667 +2017-07-14 00:00:00,2190.947833333333 +2017-07-15 00:00:00,2058.9955999999997 +2017-07-16 00:00:00,1931.2143 +2017-07-17 00:00:00,2176.6234875 +2017-07-18 00:00:00,2320.12225 +2017-07-19 00:00:00,2264.7657 +2017-07-20 00:00:00,2898.1884166666664 +2017-07-21 00:00:00,2682.1953625 +2017-07-22 00:00:00,2807.609857142857 +2017-07-23 00:00:00,2725.549716666667 +2017-07-24 00:00:00,2751.821028571429 +2017-07-25 00:00:00,2560.9979166666667 +2017-07-26 00:00:00,2495.028585714286 +2017-07-27 00:00:00,2647.625 +2017-07-28 00:00:00,2781.636583333333 +2017-07-29 00:00:00,2722.512785714286 +2017-07-30 00:00:00,2745.955416666666 +2017-07-31 00:00:00,2866.431666666667 +2017-08-01 00:00:00,2710.4130666666665 +2017-08-02 00:00:00,2693.6339833333336 +2017-08-03 00:00:00,2794.117716666666 +2017-08-04 00:00:00,2873.8510833333335 +2017-08-05 00:00:00,3218.1150166666666 +2017-08-06 00:00:00,3252.5625333333332 +2017-08-07 00:00:00,3407.2268333333336 +2017-08-08 00:00:00,3457.374333333333 +2017-08-09 00:00:00,3357.326316666667 +2017-08-10 00:00:00,3424.4042000000004 +2017-08-11 00:00:00,3632.5066666666667 +2017-08-12 00:00:00,3852.8029142857145 +2017-08-13 00:00:00,4125.54802 +2017-08-14 00:00:00,4282.992 +2017-08-15 00:00:00,4217.028328571429 +2017-08-16 00:00:00,4360.876871428572 +2017-08-17 00:00:00,4328.725716666667 +2017-08-18 00:00:00,4130.440066666667 +2017-08-19 00:00:00,4222.662214285714 +2017-08-20 00:00:00,4157.958033333333 +2017-08-21 00:00:00,4043.722 +2017-08-22 00:00:00,4082.180983333333 +2017-08-23 00:00:00,4174.95 +2017-08-24 00:00:00,4340.316716666667 +2017-08-25 00:00:00,4363.05445 +2017-08-26 00:00:00,4360.5133166666665 +2017-08-27 00:00:00,4354.308333333333 +2017-08-28 00:00:00,4391.673516666667 +2017-08-29 00:00:00,4607.98545 +2017-08-30 00:00:00,4594.98785 +2017-08-31 00:00:00,4748.255 +2017-09-01 00:00:00,4911.740016666667 +2017-09-02 00:00:00,4580.387479999999 +2017-09-03 00:00:00,4648.159983333334 +2017-09-04 00:00:00,4344.0983166666665 +2017-09-05 00:00:00,4488.72014 +2017-09-06 00:00:00,4641.822016666666 +2017-09-07 00:00:00,4654.6585000000005 +2017-09-08 00:00:00,4310.750183333334 +2017-09-09 00:00:00,4375.55952 +2017-09-10 00:00:00,4329.955 +2017-09-11 00:00:00,4248.090016666666 +2017-09-12 00:00:00,4219.036616666667 +2017-09-13 00:00:00,3961.2712666666666 +2017-09-14 00:00:00,3319.6299999999997 +2017-09-15 00:00:00,3774.2652833333336 +2017-09-16 00:00:00,3763.62604 +2017-09-17 00:00:00,3746.060783333333 +2017-09-18 00:00:00,4093.316666666667 +2017-09-19 00:00:00,3943.4133333333334 +2017-09-20 00:00:00,3977.5616666666665 +2017-09-21 00:00:00,3658.8981833333332 +2017-09-22 00:00:00,3637.5025499999997 +2017-09-23 00:00:00,3776.3869 +2017-09-24 00:00:00,3703.0406500000004 +2017-09-25 00:00:00,3942.5550000000003 +2017-09-26 00:00:00,3910.3073833333333 +2017-09-27 00:00:00,4202.554983333333 +2017-09-28 00:00:00,4201.98905 +2017-09-29 00:00:00,4193.574666666666 +2017-09-30 00:00:00,4335.368316666667 +2017-10-01 00:00:00,4360.722966666667 +2017-10-02 00:00:00,4386.88375 +2017-10-03 00:00:00,4293.3066 +2017-10-04 00:00:00,4225.175 +2017-10-05 00:00:00,4338.852 +2017-10-06 00:00:00,4345.6033333333335 +2017-10-07 00:00:00,4376.191666666667 +2017-10-08 00:00:00,4602.280883333334 +2017-10-09 00:00:00,4777.967816666666 +2017-10-10 00:00:00,4782.28 +2017-10-11 00:00:00,4819.485766666667 +2017-10-12 00:00:00,5325.130683333333 +2017-10-13 00:00:00,5563.806566666666 +2017-10-14 00:00:00,5739.438733333333 +2017-10-15 00:00:00,5647.311666666667 +2017-10-16 00:00:00,5711.205866666667 +2017-10-17 00:00:00,5603.71294 +2017-10-18 00:00:00,5546.176100000001 +2017-10-19 00:00:00,5727.6335 +2017-10-20 00:00:00,5979.45984 +2017-10-21 00:00:00,6020.371683333334 +2017-10-22 00:00:00,5983.184550000001 +2017-10-23 00:00:00,5876.079866666667 +2017-10-24 00:00:00,5505.827766666666 +2017-10-25 00:00:00,5669.622533333334 +2017-10-26 00:00:00,5893.138416666666 +2017-10-27 00:00:00,5772.504983333333 +2017-10-28 00:00:00,5776.6969500000005 +2017-10-29 00:00:00,6155.43402 +2017-10-30 00:00:00,6105.87422 +2017-10-31 00:00:00,6388.645166666666 +2017-11-01 00:00:00,6665.306683333333 +2017-11-02 00:00:00,7068.020100000001 +2017-11-03 00:00:00,7197.72006 +2017-11-04 00:00:00,7437.543316666666 +2017-11-05 00:00:00,7377.012366666667 +2017-11-06 00:00:00,6989.071666666667 +2017-11-07 00:00:00,7092.127233333333 +2017-11-08 00:00:00,7415.878250000001 +2017-11-09 00:00:00,7158.03706 +2017-11-10 00:00:00,6719.39785 +2017-11-11 00:00:00,6362.851033333333 +2017-11-12 00:00:00,5716.301583333334 +2017-11-13 00:00:00,6550.227533333334 +2017-11-14 00:00:00,6635.412633333333 +2017-11-15 00:00:00,7301.42992 +2017-11-16 00:00:00,7815.0307 +2017-11-17 00:00:00,7786.884366666666 +2017-11-18 00:00:00,7817.1403833333325 +2017-11-19 00:00:00,8007.654066666667 +2017-11-20 00:00:00,8255.596816666666 +2017-11-21 00:00:00,8059.8 +2017-11-22 00:00:00,8268.035 +2017-11-23 00:00:00,8148.95 +2017-11-24 00:00:00,8250.978333333334 +2017-11-25 00:00:00,8707.407266666667 +2017-11-26 00:00:00,9284.1438 +2017-11-27 00:00:00,9718.29505 +2017-11-28 00:00:00,9952.50882 +2017-11-29 00:00:00,9879.328333333333 +2017-11-30 00:00:00,10147.372 +2017-12-01 00:00:00,10883.912 +2017-12-02 00:00:00,11071.368333333332 +2017-12-03 00:00:00,11332.622 +2017-12-04 00:00:00,11584.83 +2017-12-05 00:00:00,11878.433333333334 +2017-12-06 00:00:00,13540.980000000001 +2017-12-07 00:00:00,16501.971666666668 +2017-12-08 00:00:00,16007.436666666666 +2017-12-09 00:00:00,15142.834152123332 +2017-12-10 00:00:00,14869.805 +2017-12-11 00:00:00,16762.116666666665 +2017-12-12 00:00:00,17276.393333333333 +2017-12-13 00:00:00,16808.366666666665 +2017-12-14 00:00:00,16678.892 +2017-12-15 00:00:00,17771.899999999998 +2017-12-16 00:00:00,19498.683333333334 +2017-12-17 00:00:00,19289.785 +2017-12-18 00:00:00,18961.856666666667 +2017-12-19 00:00:00,17737.111666666668 +2017-12-20 00:00:00,16026.271666666667 +2017-12-21 00:00:00,16047.51 +2017-12-22 00:00:00,15190.945 +2017-12-23 00:00:00,15360.261666666667 +2017-12-24 00:00:00,13949.175000000001 +2017-12-25 00:00:00,14119.028333333334 +2017-12-26 00:00:00,15999.048333333332 +2017-12-27 00:00:00,15589.321666666665 +2017-12-28 00:00:00,14380.581666666667 +2017-12-29 00:00:00,14640.14 +2017-12-30 00:00:00,13215.573999999999 +2017-12-31 00:00:00,14165.574999999999 +2018-01-01 00:00:00,13812.186666666666 +2018-01-02 00:00:00,15005.856666666667 +2018-01-03 00:00:00,15053.261666666665 +2018-01-04 00:00:00,15199.355000000001 +2018-01-05 00:00:00,17174.12 +2018-01-06 00:00:00,17319.198 +2018-01-07 00:00:00,16651.471666666668 +2018-01-08 00:00:00,15265.906666666668 +2018-01-09 00:00:00,14714.253333333334 +2018-01-10 00:00:00,15126.398333333333 +2018-01-11 00:00:00,13296.794 +2018-01-12 00:00:00,13912.882000000001 +2018-01-13 00:00:00,14499.773333333333 +2018-01-14 00:00:00,13852.92 +2018-01-15 00:00:00,14012.196 +2018-01-16 00:00:00,11180.998333333331 +2018-01-17 00:00:00,11116.946666666669 +2018-01-18 00:00:00,11345.423333333332 +2018-01-19 00:00:00,11422.44 +2018-01-20 00:00:00,12950.793333333333 +2018-01-21 00:00:00,11505.228 +2018-01-22 00:00:00,10544.593333333332 +2018-01-23 00:00:00,11223.064 +2018-01-24 00:00:00,11282.258333333333 +2018-01-25 00:00:00,11214.44 +2018-01-26 00:00:00,10969.815 +2018-01-27 00:00:00,11524.776666666667 +2018-01-28 00:00:00,11765.71 +2018-01-29 00:00:00,11212.654999999999 +2018-01-30 00:00:00,10184.061666666666 +2018-01-31 00:00:00,10125.013333333334 +2018-02-01 00:00:00,9083.258333333333 +2018-02-02 00:00:00,8901.901666666667 +2018-02-03 00:00:00,9076.678333333333 +2018-02-04 00:00:00,8400.648333333333 +2018-02-05 00:00:00,6838.816666666667 +2018-02-06 00:00:00,7685.633333333334 +2018-02-07 00:00:00,8099.958333333333 +2018-02-08 00:00:00,8240.536666666667 +2018-02-09 00:00:00,8535.516666666668 +2018-02-10 00:00:00,8319.876566184 +2018-02-11 00:00:00,8343.455 +2018-02-12 00:00:00,8811.343333333332 +2018-02-13 00:00:00,8597.7675 +2018-02-14 00:00:00,9334.633333333333 +2018-02-15 00:00:00,9977.154 +2018-02-16 00:00:00,10127.161666666667 +2018-02-17 00:00:00,10841.991666666667 +2018-02-18 00:00:00,10503.298333333334 +2018-02-19 00:00:00,11110.964999999998 +2018-02-20 00:00:00,11390.391666666668 +2018-02-21 00:00:00,10532.791666666666 +2018-02-22 00:00:00,9931.071666666667 +2018-02-23 00:00:00,10162.116666666667 +2018-02-24 00:00:00,9697.956 +2018-02-25 00:00:00,9696.593333333332 +2018-02-26 00:00:00,10348.603333333334 +2018-02-27 00:00:00,10763.883333333333 +2018-02-28 00:00:00,10370.164999999999 +2018-03-01 00:00:00,11009.381666666668 +2018-03-02 00:00:00,11055.815 +2018-03-03 00:00:00,11326.948333333334 +2018-03-04 00:00:00,11430.181666666665 +2018-03-05 00:00:00,11595.54 +2018-03-06 00:00:00,10763.198333333334 +2018-03-07 00:00:00,10118.058 +2018-03-08 00:00:00,9429.111666666666 +2018-03-09 00:00:00,9089.278333333334 +2018-03-10 00:00:00,8746.002 +2018-03-11 00:00:00,9761.396666666666 +2018-03-12 00:00:00,9182.843333333332 +2018-03-13 00:00:00,9154.699999999999 +2018-03-14 00:00:00,8151.531666666667 +2018-03-15 00:00:00,8358.121666666666 +2018-03-16 00:00:00,8530.402 +2018-03-17 00:00:00,7993.674643641666 +2018-03-18 00:00:00,8171.415 +2018-03-19 00:00:00,8412.033333333333 +2018-03-20 00:00:00,8986.948333333334 +2018-03-21 00:00:00,8947.753333333334 +2018-03-22 00:00:00,8690.408333333333 +2018-03-23 00:00:00,8686.826666666666 +2018-03-24 00:00:00,8662.378333333334 +2018-03-25 00:00:00,8617.296666666667 +2018-03-26 00:00:00,8197.548333333334 +2018-03-27 00:00:00,7876.195 +2018-03-28 00:00:00,7960.38 +2018-03-29 00:00:00,7172.28 +2018-03-30 00:00:00,6882.531666666667 +2018-03-31 00:00:00,6935.48 +2018-04-01 00:00:00,6794.105 diff --git a/pandas/Lecture.ipynb b/pandas/Lecture.ipynb new file mode 100644 index 0000000..9c4f728 --- /dev/null +++ b/pandas/Lecture.ipynb @@ -0,0 +1,1085 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![rmotr](https://user-images.githubusercontent.com/7065401/52071918-bda15380-2562-11e9-828c-7f95297e4a82.png)\n", + "
\n", + "\n", + "\n", + "\n", + "# What is Pandas?\n", + "\n", + "`pandas` is an open source, BSD-licensed library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language.\n", + "\n", + "**The pandas package is probably the most important tool for Data Scientists and Analysts working with Python today**. The powerful machine learning and glamorous visualization tools may get all the attention, but pandas is the backbone of most data-related projects.\n", + "\n", + "> **Fun fact 🎁**: `pandas` is derived from the term \"panel data\", an econometrics term for data sets that include observations over multiple time periods for the same individuals. — Wikipedia\n", + "\n", + "pandas popularity has **grown exponentially** in the last years. Here's an image of The Atlas showing popularity of data science tools on Stack Overflow where we see pandas has become the dominating tools used by Python data scientists.\n", + "\n", + "
\n", + " \n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)\n", + "\n", + "## What is pandas used for?\n", + "\n", + "If you're thinking about data science as a career, then it is imperative that one of the first things you do is learn pandas.\n", + "\n", + "> **This tool will help you get, clean, transform and analyze your data**.\n", + "\n", + "For example, say you want to explore a dataset stored in a CSV on your computer. The first step is to use pandas to extract the data from that CSV into a DataFrame (a table-like data structure, we'll see more about it later). The we proceed with the routine data analysis tasks:\n", + "\n", + "- Quick Exploratory Data Analysis (EDA);\n", + "- Calculate statistics such as average, median, max, or min of each column;\n", + "- Creating visualizations. Plot bars, lines, histograms, bubbles, and more;\n", + "- Cleaning the data by doing things like removing missing values and filtering rows or columns by some criteria;\n", + "- Building machine learning models to create predictions or classifications\n", + "- Store the cleaned, transformed data back into a CSV, other file or a database;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)\n", + "\n", + "## Why no just using Excel?\n", + "\n", + "Excel is one of the most popular and widely-used data tools; it's hard to find an organization that doesn't work with it in some way. From analysts, to sales VPs, to CEOs, professionals use Excel for both quick stats and accounting and serious data crunching.\n", + "\n", + "> Using pandas with Microsoft Excel can give you the best of both worlds and optimize your workflow.\n", + "\n", + "Pandas works with data stored in Python to manipulate and analyze data. As opposed to Excel, Python is completely **free to download and use**.\n", + "\n", + "Pandas operates right on the back of Python. As a result, is **extremely fast and efficient** by using useful methods that **allow automating data processing tasks better than what Excel does**, including processing Excel files.\n", + "\n", + "In Excel, once you exceed 50K rows, it starts to slow down considerably. Pandas, on the other hand, **has no real limit and handles millions of data points seamlessly**. In terms of pure space, Excel caps a single spreadsheet at 1.048.576 rows exactly. At that point, your calculations would take forever to compute. More likely, Excel would just crash. A million rows may seem like a lot of data, but for data scientists, this is but a drop in the bucket.\n", + "\n", + "Pandas, however, has no limitation to the number of data points you can have in a `DataFrame` (their version of a data set). It’s limited only by the amount of memory (RAM) of the computer it is running on.\n", + "\n", + "It is also **easier to create and use complex equations and calculations on your data**. You can apply hundreds of computations to millions of data points instantly with pandas. Since Python is open source, there are already hundreds of libraries created that could streamline the length of time it takes to calculate." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "## Hands on!\n", + "\n", + "We'll just import pandas and other useful libraries such as numpy, matplotlib and seaborn to work with.\n", + "\n", + "Note that to import pandas and numpy we use the aliases `pd` and `np`. This is just a convention, which means it's not strictly necessary, but it is recommended." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdAAAABICAYAAABGH6SaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAACWUlEQVR4nO3bvWoUYRiG4XdUgiSNhRGRgCDYBAUhgofhWSyCpZWFlWfhCQjaeQb2WgREEPypBFGJaCoFGQtthJBlH/Lx7QzX1S07xfMWyw0TMozjWADAak71HgAAUySgABAQUAAICCgABAQUAAICCgCBM8seGIZhUVWLqqphY3Pv7PaV5qN62T382HtCW5d6D2jr9emN3hOa2T342XtCUy/PXew9oam9b796T2jqYGtpSibry+dPdfjj+3DUd8Mq/we6uXN9vHr32YkNWzf7z+/3ntDU+LD3grZubO30ntDM/pN3vSc0Ndye92/v99MPvSc09fjWhd4Tmnlw7069f/vmyIB6hQsAAQEFgICAAkBAQAEgIKAAEBBQAAgIKAAEBBQAAgIKAAEBBYCAgAJAQEABICCgABAQUAAICCgABAQUAAICCgABAQWAgIACQEBAASAgoAAQEFAACAgoAAQEFAACAgoAAQEFgICAAkBAQAEgIKAAEBBQAAgIKAAEBBQAAgIKAAEBBYCAgAJAQEABICCgABAQUAAICCgABAQUAAICCgABAQWAgIACQEBAASAgoAAQEFAACAgoAAQEFAACAgoAAQEFgICAAkBAQAEgMIzjePwDw7CoqsW/j9eq6lXrUR2dr6qvvUc0Mufbqtw3de6brjnfVlV1eRzH7aO+WBrQ/x4ehhfjON48sVlrZs73zfm2KvdNnfuma863LeMVLgAEBBQAAqsG9FGTFetjzvfN+bYq902d+6Zrzrcda6W/gQIAf3mFCwABAQWAgIACQEBAASAgoAAQ+AOXfmFPuMDVZwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from utils import apply_theme\n", + "\n", + "%matplotlib inline\n", + "apply_theme()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "## NumPy and pandas\n", + "\n", + "**Pandas is built on top of the NumPy package**, which means that all the efficient structures and functions we saw about numpy in previous lessons, will also apply to pandas.\n", + "\n", + "While pandas adopts many coding idioms from NumPy, the biggest difference is that pandas is designed to work with tabular or heterogeneous data. NumPy, by contrast, is best suited for working with homogeneous numerical (possibly multidimensional) arrays." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Overview Data Structures - Series and Dataframe\n", + "\n", + "To get started with pandas, you will need to get comfortable with its two main data structures: `Series` and `DataFrame`s.\n", + "\n", + "A `Series` is essentially used for column-data, and a `DataFrame` is a multi-dimensional table made up of a collection of `Series`. Pandas relies on NumPy arrays to store this data, which means it also uses its data types.\n", + "\n", + "\n", + "\n", + "`DataFrame`s and `Series` are quite similar in that many operations that you can do with one you can do with the other, such as filling in null values and calculating the mean.\n", + "\n", + "Let's define some data within Python lists:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "names = ['Avery Bradley', 'John Holland', 'Jonas Jerebko',\n", + " 'Jordan Mickey', 'Terry Rozier', 'Jared Sullinger', 'Evan Turner']\n", + "\n", + "teams = ['Boston Celtics', 'Boston Celtics', 'Boston Celtics',\n", + " 'Boston Celtics', 'Boston Celtics', 'Boston Celtics', 'Boston Celtics']\n", + "\n", + "numbers = [0, 30, 8, np.nan, 12, 7, 11]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Avery Bradley',\n", + " 'John Holland',\n", + " 'Jonas Jerebko',\n", + " 'Jordan Mickey',\n", + " 'Terry Rozier',\n", + " 'Jared Sullinger',\n", + " 'Evan Turner']" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "names" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Boston Celtics',\n", + " 'Boston Celtics',\n", + " 'Boston Celtics',\n", + " 'Boston Celtics',\n", + " 'Boston Celtics',\n", + " 'Boston Celtics',\n", + " 'Boston Celtics']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "teams" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0, 30, 8, nan, 12, 7, 11]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "numbers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Series creation" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Name
0Avery Bradley
1John Holland
2Jonas Jerebko
3Jordan Mickey
4Terry Rozier
5Jared Sullinger
6Evan Turner
\n", + "
" + ], + "text/plain": [ + " Name\n", + "0 Avery Bradley\n", + "1 John Holland\n", + "2 Jonas Jerebko\n", + "3 Jordan Mickey\n", + "4 Terry Rozier\n", + "5 Jared Sullinger\n", + "6 Evan Turner" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_series = pd.Series(names, name='Name')\n", + "\n", + "my_series.to_frame()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each value can be accessed using just its key/index position on Series:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Jordan Mickey'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_series[3]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Jordan Mickey'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_series.loc[3]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### DataFrame creation\n", + "\n", + "There are many ways to create a `DataFrame` from scratch, but a great option is to just use a simple `dict`." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "data = {\n", + " 'Name': names,\n", + " 'Team': teams,\n", + " 'Number': numbers\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameTeamNumber
0Avery BradleyBoston Celtics0.000
1John HollandBoston Celtics30.000
2Jonas JerebkoBoston Celtics8.000
3Jordan MickeyBoston Celticsnan
4Terry RozierBoston Celtics12.000
5Jared SullingerBoston Celtics7.000
6Evan TurnerBoston Celtics11.000
\n", + "
" + ], + "text/plain": [ + " Name Team Number\n", + "0 Avery Bradley Boston Celtics 0.000\n", + "1 John Holland Boston Celtics 30.000\n", + "2 Jonas Jerebko Boston Celtics 8.000\n", + "3 Jordan Mickey Boston Celtics nan\n", + "4 Terry Rozier Boston Celtics 12.000\n", + "5 Jared Sullinger Boston Celtics 7.000\n", + "6 Evan Turner Boston Celtics 11.000" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_df = pd.DataFrame(data)\n", + "\n", + "my_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each value can be accessed using its key/index position and value position on DataFrames:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 Avery Bradley\n", + "1 John Holland\n", + "2 Jonas Jerebko\n", + "3 Jordan Mickey\n", + "4 Terry Rozier\n", + "5 Jared Sullinger\n", + "6 Evan Turner\n", + "Name: Name, dtype: object" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_df['Name']" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Jordan Mickey'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_df['Name'][3]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Jordan Mickey'" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_df.loc[3, 'Name']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> In future lectures we'll see more on locating and extracting data from the DataFrame, don't worry if you don't get it right not.\n", + "\n", + "Let's move on to some quick methods for creating DataFrames from various other sources." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "## Reading external data\n", + "\n", + "pandas allow us to read different types of external data files such as CSV, TXT and XLS.\n", + "\n", + "With CSV files all you need is a single line to load in the data:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TimestampOpenHighLowCloseVolume (BTC)Volume (Currency)Weighted Price
01/1/17 0:00966.3401,005.000960.530997.7506,850.5906,764,742.060987.470
11/2/17 0:00997.7501,032.000990.0101,012.5408,167.3808,273,576.9901,013.000
21/3/17 0:001,011.4401,039.000999.9901,035.2409,089.6609,276,500.3101,020.560
31/4/17 0:001,035.5101,139.8901,028.5601,114.92021,562.46023,469,644.9601,088.450
41/5/17 0:001,114.3801,136.720885.4101,004.74036,018.86036,211,399.5301,005.350
\n", + "
" + ], + "text/plain": [ + " Timestamp Open High Low Close Volume (BTC) Volume (Currency) Weighted Price\n", + "0 1/1/17 0:00 966.340 1,005.000 960.530 997.750 6,850.590 6,764,742.060 987.470\n", + "1 1/2/17 0:00 997.750 1,032.000 990.010 1,012.540 8,167.380 8,273,576.990 1,013.000\n", + "2 1/3/17 0:00 1,011.440 1,039.000 999.990 1,035.240 9,089.660 9,276,500.310 1,020.560\n", + "3 1/4/17 0:00 1,035.510 1,139.890 1,028.560 1,114.920 21,562.460 23,469,644.960 1,088.450\n", + "4 1/5/17 0:00 1,114.380 1,136.720 885.410 1,004.740 36,018.860 36,211,399.530 1,005.350" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('../data/bitcoin_data.csv')\n", + "\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Also, there are many options when loading data, for example CSVs don't have indexes like our DataFrames, so we'll designate the `index_col` when reading:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
OpenHighLowClose
Timestamp
2017-01-01966.3401,005.000960.530997.750
2017-01-02997.7501,032.000990.0101,012.540
2017-01-031,011.4401,039.000999.9901,035.240
2017-01-041,035.5101,139.8901,028.5601,114.920
2017-01-051,114.3801,136.720885.4101,004.740
\n", + "
" + ], + "text/plain": [ + " Open High Low Close\n", + "Timestamp \n", + "2017-01-01 966.340 1,005.000 960.530 997.750\n", + "2017-01-02 997.750 1,032.000 990.010 1,012.540\n", + "2017-01-03 1,011.440 1,039.000 999.990 1,035.240\n", + "2017-01-04 1,035.510 1,139.890 1,028.560 1,114.920\n", + "2017-01-05 1,114.380 1,136.720 885.410 1,004.740" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\n", + " '../data/bitcoin_data.csv',\n", + " index_col=0,\n", + " parse_dates=True\n", + ").loc[:, 'Open':'Close']\n", + "\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Bitcoin price (USD)')" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "dark" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots(figsize=(16, 6))\n", + "\n", + "df.plot(ax=ax)\n", + "\n", + "plt.title(\"Bitcoin price (USD)\", fontsize=16, fontweight='bold', color='white')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "## Plotting example: Bollinger bands\n", + "\n", + "As a sneak peek of what we'll see in upcoming lectures, lets make some basic plots using _pandas_.\n", + "\n", + "Bollinger Bands are a technical trading tool created by John Bollinger in the early 1980s. They arose from the need for adaptive trading bands and the observation that volatility was dynamic, not static as was widely believed at the time." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Calculate Bollinger bands\n", + "\n", + "To demostrate the strategy we will use a 30 periods rolling mean window, and 1.5 standard deviations for each of the bands. This might not be the optimal configuration for this dataset, but we will talk more about optimizing these two arguments later." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# set number of days and standard deviations to use for rolling \n", + "# lookback period for Bollinger band calculation\n", + "window = 30\n", + "no_of_std = 1.5\n", + "\n", + "# calculate rolling mean and standard deviation\n", + "rolling_mean = df['Close'].rolling(window).mean()\n", + "rolling_std = df['Close'].rolling(window).std()\n", + "\n", + "# create two new DataFrame columns to hold values of upper and lower Bollinger bands\n", + "df['Rolling Mean'] = rolling_mean\n", + "df['Bollinger High'] = rolling_mean + (rolling_std * no_of_std)\n", + "df['Bollinger Low'] = rolling_mean - (rolling_std * no_of_std)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
OpenHighLowCloseRolling MeanBollinger HighBollinger Low
Timestamp
2018-03-248,917.9909,020.0008,505.0008,547.0009,533.03011,150.4067,915.654
2018-03-258,541.9608,680.0008,368.6308,453.9009,475.95711,109.2297,842.684
2018-03-268,451.1208,500.0007,831.1508,149.6609,424.61211,096.2487,752.976
2018-03-278,152.2608,211.6207,742.1107,791.7009,364.66811,094.0487,635.287
2018-03-287,791.6908,104.9807,723.0308,039.8609,288.50611,032.6167,544.396
\n", + "
" + ], + "text/plain": [ + " Open High Low Close Rolling Mean Bollinger High Bollinger Low\n", + "Timestamp \n", + "2018-03-24 8,917.990 9,020.000 8,505.000 8,547.000 9,533.030 11,150.406 7,915.654\n", + "2018-03-25 8,541.960 8,680.000 8,368.630 8,453.900 9,475.957 11,109.229 7,842.684\n", + "2018-03-26 8,451.120 8,500.000 7,831.150 8,149.660 9,424.612 11,096.248 7,752.976\n", + "2018-03-27 8,152.260 8,211.620 7,742.110 7,791.700 9,364.668 11,094.048 7,635.287\n", + "2018-03-28 7,791.690 8,104.980 7,723.030 8,039.860 9,288.506 11,032.616 7,544.396" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Bitcoin - Bollinger bands (USD)')" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "dark" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots(figsize=(16, 6))\n", + "\n", + "df[['Close','Bollinger High','Bollinger Low']].plot(ax=ax)\n", + "\n", + "plt.title(\"Bitcoin - Bollinger bands (USD)\", fontsize=16, fontweight='bold', color='white')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> Check out the blog post we wrote about Bollinger bands here!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pandas/conditional-selection-and-operations.ipynb b/pandas/conditional-selection-and-operations.ipynb new file mode 100644 index 0000000..e1c5245 --- /dev/null +++ b/pandas/conditional-selection-and-operations.ipynb @@ -0,0 +1,1548 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![rmotr](https://user-images.githubusercontent.com/7065401/52071918-bda15380-2562-11e9-828c-7f95297e4a82.png)\n", + "
\n", + "\n", + "\n", + "\n", + "# Vectorized Operations and Methods on Pandas Series\n", + "\n", + "Series also support vectorized operations and aggregation functions as Numpy, on this lecture we'll see most common ones." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)\n", + "\n", + "## Hands on! " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "pd.options.display.float_format = '{:,.2f}'.format" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "The first thing we'll do is create again the `Series` from our previous lecture: " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "g7_pop = pd.Series({\n", + " 'Canada': 35.467,\n", + " 'France': 63.951,\n", + " 'Germany': 80.94,\n", + " 'Italy': 60.665,\n", + " 'Japan': 127.061,\n", + " 'United Kingdom': 64.511,\n", + " 'United States': 318.523\n", + "}, dtype=np.float, name='G7 Population in millions')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.47\n", + "France 63.95\n", + "Germany 80.94\n", + "Italy 60.66\n", + "Japan 127.06\n", + "United Kingdom 64.51\n", + "United States 318.52\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "gdp = pd.Series(\n", + " [1785387, 2833687, 3874437, 2167744, 4602367, 2950039, 17348075],\n", + " index=['Canada', 'France', 'Germany', 'Italy',\n", + " 'Japan', 'United Kingdom', 'United States'],\n", + " dtype=np.float,\n", + " name='G7 GDP in millions')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 1,785,387.00\n", + "France 2,833,687.00\n", + "Germany 3,874,437.00\n", + "Italy 2,167,744.00\n", + "Japan 4,602,367.00\n", + "United Kingdom 2,950,039.00\n", + "United States 17,348,075.00\n", + "Name: G7 GDP in millions, dtype: float64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdp" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.47\n", + "France 63.95\n", + "Germany 80.94\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Japan 127.06\n", + "United Kingdom 64.51\n", + "United States 318.52\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop.tail(3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "## `Series` vectorized operations" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35,467,000.00\n", + "France 63,951,000.00\n", + "Germany 80,940,000.00\n", + "Italy 60,665,000.00\n", + "Japan 127,061,000.00\n", + "United Kingdom 64,511,000.00\n", + "United States 318,523,000.00\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop * 1_000_000" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 1,000,035.47\n", + "France 1,000,063.95\n", + "Germany 1,000,080.94\n", + "Italy 1,000,060.67\n", + "Japan 1,000,127.06\n", + "United Kingdom 1,000,064.51\n", + "United States 1,000,318.52\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop + 1_000_000" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 1,785,387,000,000.00\n", + "France 2,833,687,000,000.00\n", + "Germany 3,874,437,000,000.00\n", + "Italy 2,167,744,000,000.00\n", + "Japan 4,602,367,000,000.00\n", + "United Kingdom 2,950,039,000,000.00\n", + "United States 17,348,075,000,000.00\n", + "Name: G7 GDP in millions, dtype: float64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdp * 1_000_000" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Operation between Series:**" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 50,339.39\n", + "France 44,310.28\n", + "Germany 47,868.01\n", + "Italy 35,733.03\n", + "Japan 36,221.71\n", + "United Kingdom 45,729.24\n", + "United States 54,464.12\n", + "dtype: float64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdp / g7_pop" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 50,339.39\n", + "France 44,310.28\n", + "Germany 47,868.01\n", + "Italy 35,733.03\n", + "Japan 36,221.71\n", + "United Kingdom 45,729.24\n", + "United States 54,464.12\n", + "dtype: float64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(gdp * 1_000_000) / (g7_pop * 1_000_000)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "## Using _Universal Functions (Ufuncs)_ to obtain statistical info\n", + "\n", + "We can apply any _Universal Function_ to a Series.\n", + "\n", + "Another useful method is `describe`, which gives you a good \"summary\" of the `Series`. Let's explore other methods in more detail:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 7.00\n", + "mean 107.30\n", + "std 97.25\n", + "min 35.47\n", + "25% 62.31\n", + "50% 64.51\n", + "75% 104.00\n", + "max 318.52\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "318.523" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "35.467" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop.min()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "107.30257142857144" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "97.24996987121581" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop.std()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "61.3222" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop.quantile(.2)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "117.83680000000004" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop.quantile(.8)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 3.57\n", + "France 4.16\n", + "Germany 4.39\n", + "Italy 4.11\n", + "Japan 4.84\n", + "United Kingdom 4.17\n", + "United States 5.76\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.log(g7_pop)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![rmotr](https://user-images.githubusercontent.com/7065401/52071918-bda15380-2562-11e9-828c-7f95297e4a82.png)\n", + "
\n", + "\n", + "\n", + "\n", + "# Conditional Selection & Filtering on Pandas Series\n", + "\n", + "In conditional selection (also known as **boolean selection**), we will select subsets of data based on the actual values of the data in the Series by using a boolean vector to filter the data." + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)\n", + "\n", + "## Hands on!" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": 1, + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "The first thing we'll do is create again the `Series` from our previous lecture:" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": 2, + "source": [ + "data_dic = {\n", + " 'Canada': 35.467,\n", + " 'France': 63.951,\n", + " 'Germany': 80.94,\n", + " 'Italy': 60.665,\n", + " 'Japan': 127.061,\n", + " 'United Kingdom': 64.511,\n", + " 'United States': 318.523\n", + "}\n", + "\n", + "g7_pop = pd.Series(data_dic,\n", + " name='G7 Population in millions')" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.467\n", + "France 63.951\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 3, + "source": "g7_pop" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "Summary of selection (from previous lesson):" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "63.951" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 4, + "source": "g7_pop['France']" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "63.951" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 5, + "source": "g7_pop.loc['France']" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "35.467" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 6, + "source": "g7_pop.iloc[0]" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "## Conditional selection ( boolean arrays)\n", + "\n", + "The same boolean array techniques we saw applied to numpy arrays can be used for Pandas `Series`.\n", + "\n", + "On previous lecture we saw that we can index our `Series` using a list of boolean values:" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "France 63.951\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 7, + "source": "g7_pop[[False, True, True, True, False, False, False]]" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "More documented:" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "France 63.951\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 8, + "source": [ + "g7_pop[[\n", + " False, # CA\n", + " True, # Fr\n", + " True, # GE\n", + " True, # IT\n", + " False, # JA\n", + " False, # UK\n", + " False #US\n", + "]]" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "Now we'll go a step further and use a real condition to generate these list of boolean values:" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada False\n", + "France False\n", + "Germany True\n", + "Italy False\n", + "Japan True\n", + "United Kingdom False\n", + "United States True\n", + "Name: G7 Population in millions, dtype: bool" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 9, + "source": [ + "condition = g7_pop > 70\n", + "\n", + "condition" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Germany 80.940\n", + "Japan 127.061\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 10, + "source": "g7_pop[condition]" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Germany 80.940\n", + "Japan 127.061\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 11, + "source": "g7_pop.loc[g7_pop > 70]" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "107.30257142857144" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 12, + "source": "g7_pop.mean()" + }, + { + "metadata": { + "scrolled": true + }, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Japan 127.061\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 13, + "source": "g7_pop[g7_pop > g7_pop.mean()]" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Japan 127.061\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 14, + "source": "g7_pop.loc[g7_pop > g7_pop.mean()]" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 15, + "source": "g7_pop.loc[g7_pop > g7_pop.mean()].size" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Operators" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "#### `or`" + }, + { + "metadata": { + "scrolled": true + }, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.467\n", + "Germany 80.940\n", + "Japan 127.061\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 16, + "source": "g7_pop[(g7_pop > 70) | (g7_pop < 40)]" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "#### `and`" + }, + { + "metadata": { + "scrolled": true + }, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Germany 80.940\n", + "Japan 127.061\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 17, + "source": "g7_pop[(g7_pop > 80) & (g7_pop < 200)]" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "#### `not`" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.467\n", + "France 63.951\n", + "Italy 60.665\n", + "United Kingdom 64.511\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 18, + "source": "g7_pop.loc[~(g7_pop > 80)]" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Germany 80.940\n", + "Japan 127.061\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 19, + "source": "g7_pop.loc[g7_pop > 80]" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Japan 127.061\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 20, + "source": "g7_pop[g7_pop > g7_pop.mean()]" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "97.24996987121581" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 21, + "source": "g7_pop.std()" + }, + { + "metadata": { + "scrolled": true + }, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "France 63.951\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 22, + "source": "g7_pop[(g7_pop > g7_pop.mean() - g7_pop.std() / 2) | (g7_pop > g7_pop.mean() + g7_pop.std() / 2)]" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Indexing with isin\n", + "\n", + "Consider the `isin()` method of `Series`, which returns a boolean vector that is true wherever the Series elements exist in the passed list. This allows you to select rows where one or more columns have values you want:" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.467\n", + "France 63.951\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 23, + "source": "g7_pop" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.467\n", + "Germany 80.940\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 24, + "source": "g7_pop[g7_pop.isin([80, 80.940, 60.451, 35.467])]" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.467\n", + "Italy 60.665\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 25, + "source": "g7_pop[g7_pop.index.isin(['Canada', 'Italy'])]" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "### Modifying series using conditional selection" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 99.990\n", + "France 99.990\n", + "Germany 80.940\n", + "Italy 99.990\n", + "Japan 127.061\n", + "United Kingdom 99.990\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 26, + "source": [ + "g7_pop[g7_pop < 70] = 99.99\n", + "\n", + "g7_pop" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "Also we can combine `+=`, `-=`, `*=` operations while modifying values.\n", + "\n", + "Lets remove 5 million from countries with population >100M:" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 99.990\n", + "France 99.990\n", + "Germany 80.940\n", + "Italy 99.990\n", + "Japan 132.061\n", + "United Kingdom 99.990\n", + "United States 323.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 27, + "source": [ + "g7_pop[g7_pop > 100] += 5\n", + "\n", + "g7_pop" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![rmotr](https://user-images.githubusercontent.com/7065401/52071918-bda15380-2562-11e9-828c-7f95297e4a82.png)\n", + "
\n", + "\n", + "\n", + "\n", + "# Pandas Series - Sorting\n", + "\n", + "In many use cases `Series` values need to be sorted.\n", + "\n", + "Sorting in Pandas is extremely easy. There are two important methods to be used for Series and DataFrames that will take care of the job: `sort_values` and `sort_index`." + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)\n", + "\n", + "## Hands on!" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": 1, + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": 2, + "source": "pd.options.display.float_format = '{:,.2f}'.format" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "The first thing we'll do is create again the `Series` from our previous lecture:" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": 3, + "source": [ + "g7_pop = pd.Series({\n", + " 'Canada': 35.467,\n", + " 'France': 63.951,\n", + " 'Germany': 80.94,\n", + " 'Italy': 60.665,\n", + " 'Japan': 127.061,\n", + " 'United Kingdom': 64.511,\n", + " 'United States': 318.523\n", + "}, dtype=np.float, name='G7 Population in millions')" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.47\n", + "France 63.95\n", + "Germany 80.94\n", + "Italy 60.66\n", + "Japan 127.06\n", + "United Kingdom 64.51\n", + "United States 318.52\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 4, + "source": "g7_pop" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": 5, + "source": [ + "gdp = pd.Series(\n", + " [1785387, 2833687, 3874437, 2167744, 4602367, 2950039, 17348075],\n", + " index=['Canada', 'France', 'Germany', 'Italy',\n", + " 'Japan', 'United Kingdom', 'United States'],\n", + " dtype=np.float,\n", + " name='G7 GDP in millions')" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "### Sorting values" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.47\n", + "France 63.95\n", + "Germany 80.94\n", + "Italy 60.66\n", + "Japan 127.06\n", + "United Kingdom 64.51\n", + "United States 318.52\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 6, + "source": "g7_pop" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.47\n", + "Italy 60.66\n", + "France 63.95\n", + "United Kingdom 64.51\n", + "Germany 80.94\n", + "Japan 127.06\n", + "United States 318.52\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 7, + "source": "g7_pop.sort_values()" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "As you can see, sorting is as simple as invoking the `sort_values` method. By default, values are sorted in ascending order, which you can customize with the `ascending` parameter." + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "United States 318.52\n", + "Japan 127.06\n", + "Germany 80.94\n", + "United Kingdom 64.51\n", + "France 63.95\n", + "Italy 60.66\n", + "Canada 35.47\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 8, + "source": "g7_pop.sort_values(ascending=False)" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.47\n", + "France 63.95\n", + "Germany 80.94\n", + "Italy 60.66\n", + "Japan 127.06\n", + "United Kingdom 64.51\n", + "United States 318.52\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 9, + "source": "g7_pop" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": 10, + "source": "g7_pop.sort_values(ascending=False, inplace=True)" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "United States 318.52\n", + "Japan 127.06\n", + "Germany 80.94\n", + "United Kingdom 64.51\n", + "France 63.95\n", + "Italy 60.66\n", + "Canada 35.47\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 11, + "source": "g7_pop" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Sorting index\n", + "\n", + "`sort_index` works exactly in the same way:" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.47\n", + "France 63.95\n", + "Germany 80.94\n", + "Italy 60.66\n", + "Japan 127.06\n", + "United Kingdom 64.51\n", + "United States 318.52\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 12, + "source": "g7_pop.sort_index()" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pandas/series.ipynb b/pandas/series.ipynb new file mode 100644 index 0000000..d3c399a --- /dev/null +++ b/pandas/series.ipynb @@ -0,0 +1,2235 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![rmotr](https://user-images.githubusercontent.com/7065401/52071918-bda15380-2562-11e9-828c-7f95297e4a82.png)\n", + "
\n", + "\n", + "\n", + "\n", + "# Intro to Pandas Series\n", + "\n", + "A Series is a one-dimensional array-like object containing a _typed_ sequence of values and an associated array of data labels, called its _index_." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)\n", + "\n", + "## Hands on!\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "## Series creation\n", + "\n", + "`pd.Series`' constructor accepts the following parameters:\n", + "\n", + "- **data**: (required) has all the data we want to store on the Series and could be an scalar value, a Python sequence or an unidimensional NumPy ndarray.\n", + "- **index**: (optional), has all the labels that we want to assign to our data values and could be a Python sequence or an unidimensional NumPy ndarray. Default value: `np.arange(0, len(data))`.\n", + "- **dtype**: (optional) any NumPy data type." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 2\n", + "2 3\n", + "3 4\n", + "4 5\n", + "dtype: int64" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series = pd.Series([1, 2, 3, 4, 5])\n", + "series" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Series have an associated type:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 2\n", + "2 3\n", + "3 4\n", + "4 5\n", + "dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Show first values of our Series\n", + "series.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dtype('int64')" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.dtype" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1.0\n", + "1 2.0\n", + "2 3.0\n", + "3 4.0\n", + "4 5.0\n", + "dtype: float64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series = pd.Series([1, 2, 3, 4, 5], dtype=np.float)\n", + "series" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dtype('float64')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.dtype" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 a\n", + "1 b\n", + "2 c\n", + "3 d\n", + "4 e\n", + "dtype: object" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series = pd.Series(['a', 'b', 'c', 'd', 'e'])\n", + "series" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2\n", + "1 4\n", + "2 6\n", + "3 8\n", + "4 10\n", + "dtype: int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Using a ndarraynp.array([2, 4, 6, 8, 10\n", + "array = np.array([2, 4, 6, 8, 10])\n", + "series = pd.Series(array)\n", + "series" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "a 1\n", + "b 2\n", + "c 3\n", + "d 4\n", + "e 5\n", + "dtype: int64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# With predefined index\n", + "series = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])\n", + "series" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "a 1.0\n", + "b 2.0\n", + "c 3.0\n", + "d 4.0\n", + "e 5.0\n", + "dtype: float64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Using a dictionary (index will be defined using keys)\n", + "series = pd.Series({'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}, dtype=np.float64)\n", + "series" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "## Series attributes\n", + "\n", + "These are the most common attributes to get information about a `Series`:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "a 1.0\n", + "b 2.0\n", + "c 3.0\n", + "d 4.0\n", + "e 5.0\n", + "dtype: float64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series = pd.Series(data=[1, 2, 3, 4, 5],\n", + " index=['a', 'b', 'c', 'd', 'e'],\n", + " dtype=np.float64)\n", + "series" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dtype('float64')" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Type of our Series\n", + "series.dtype" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1., 2., 3., 4., 5.])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Values of a series\n", + "series.values" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "numpy.ndarray" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(series.values)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['a', 'b', 'c', 'd', 'e'], dtype='object')" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Index of a series\n", + "series.index" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Dimension of the Series\n", + "series.ndim" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5,)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Shape of the Series\n", + "series.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "5" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Number of Series elements\n", + "series.size" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "## The Group of Seven\n", + "\n", + "We'll start analyzing \"[The Group of Seven](https://en.wikipedia.org/wiki/Group_of_Seven)\". Which is a political formed by Canada, France, Germany, Italy, Japan, the United Kingdom and the United States. We'll start by analyzing population, and for that, we'll use a `pandas.Series` object." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 35.467\n", + "1 63.951\n", + "2 80.940\n", + "3 60.665\n", + "4 127.061\n", + "5 64.511\n", + "6 318.523\n", + "dtype: float64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# In millions\n", + "g7_pop = pd.Series([35.467, 63.951, 80.940, 60.665, 127.061, 64.511, 318.523])\n", + "\n", + "g7_pop" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Someone might not know we're representing population in millions of inhabitants. Series can have a `name`, to better document the purpose of the Series:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 35.467\n", + "1 63.951\n", + "2 80.940\n", + "3 60.665\n", + "4 127.061\n", + "5 64.511\n", + "6 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop.name = 'G7 Population in millions'\n", + "\n", + "g7_pop" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Series are pretty similar to numpy arrays:" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dtype('float64')" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop.dtype" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "numpy.ndarray" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(series.values)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop.ndim" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(7,)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "7" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop.size" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And they _look_ like simple Python lists or Numpy Arrays. But they're actually more similar to Python `dict`s." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 35.467\n", + "1 63.951\n", + "2 80.940\n", + "3 60.665\n", + "4 127.061\n", + "5 64.511\n", + "6 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RangeIndex(start=0, stop=7, step=1)" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop.index" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Assigning `Series` indexes\n", + "\n", + "In contrast to lists, we can explicitly define the index:" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "g7_pop.index = [\n", + " 'Canada',\n", + " 'France',\n", + " 'Germany',\n", + " 'Italy',\n", + " 'Japan',\n", + " 'United Kingdom',\n", + " 'United States',\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.467\n", + "France 63.951\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Compare it with the [following table](https://docs.google.com/spreadsheets/d/1IlorV2-Oh9Da1JAZ7weVw86PQrQydSMp-ydVMH135iI/edit?usp=sharing): \n", + "\n", + "![image](https://user-images.githubusercontent.com/872296/38149656-b5ce9816-3431-11e8-88e4-195756e25355.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Removing indexes\n", + "\n", + "We can also remove current indexes from our `Series`, going back to the original indexes. To do that we use the `reset_index()` method with `drop=True` parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.467\n", + "France 63.951\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 35.467\n", + "1 63.951\n", + "2 80.940\n", + "3 60.665\n", + "4 127.061\n", + "5 64.511\n", + "6 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop.reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.467\n", + "France 63.951\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that `reset_index()` will return a new `Series`, so if we want to keep it we need to assign it to a variable, or use `inplace=True` parameter to modify the original `Series`." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "g7_pop.reset_index(drop=True, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 35.467\n", + "1 63.951\n", + "2 80.940\n", + "3 60.665\n", + "4 127.061\n", + "5 64.511\n", + "6 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating a `Series` with indexes already\n", + "\n", + "We can create a new `Series` with its indexes labels in a single step:" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.467\n", + "France 63.951\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "values = [35.467, 63.951, 80.94, 60.665, 127.061, 64.511, 318.523]\n", + "indexes = ['Canada', 'France', 'Germany', 'Italy',\n", + " 'Japan', 'United Kingdom', 'United States']\n", + "\n", + "pd.Series(values,\n", + " index=indexes,\n", + " name='G7 Population in millions')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating a `Series` from a data dictionary\n", + "We can say that Series look like \"ordered dictionaries\". We can actually create Series out of dictionaries:" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "data_dic = {\n", + " 'Canada': 35.467,\n", + " 'France': 63.951,\n", + " 'Germany': 80.94,\n", + " 'Italy': 60.665,\n", + " 'Japan': 127.061,\n", + " 'United Kingdom': 64.511,\n", + " 'United States': 318.523\n", + "}\n", + "\n", + "g7_pop = pd.Series(data_dic,\n", + " name='G7 Population in millions')" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.467\n", + "France 63.951\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g7_pop" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating a `Series` out of other `Series`\n", + "\n", + "You can also create Series out of other series, specifying indexes:" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "France 63.951\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Spain NaN\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.Series(g7_pop,\n", + " index=['France', 'Germany', 'Italy', 'Spain'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![rmotr](https://user-images.githubusercontent.com/7065401/52071918-bda15380-2562-11e9-828c-7f95297e4a82.png)\n", + "
\n", + "\n", + "\n", + "\n", + "# Pandas Series - Selection and Indexing\n", + "\n", + "Pandas Series object acts in many ways like a one-dimensional NumPy array, and in many ways like a standard Python dictionary. If we keep these two overlapping analogies in mind, it will help us to understand the patterns of data indexing and selection in these data structures." + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)\n", + "\n", + "## Hands on!" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": 1, + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "The first thing we'll do is create again the `Series` from our previous lecture:" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": 2, + "source": [ + "data_dic = {\n", + " 'Canada': 35.467,\n", + " 'France': 63.951,\n", + " 'Germany': 80.94,\n", + " 'Italy': 60.665,\n", + " 'Japan': 127.061,\n", + " 'United Kingdom': 64.511,\n", + " 'United States': 318.523\n", + "}\n", + "\n", + "g7_pop = pd.Series(data_dic,\n", + " name='G7 Population in millions')" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.467\n", + "France 63.951\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 3, + "source": "g7_pop" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "## Indexing\n", + "\n", + "Indexing works similarly to lists and dictionaries.\n", + "\n", + "### Indexing by index\n", + "\n", + "you use the **index** of the element you're looking for:" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "35.467" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 4, + "source": "g7_pop['Canada']" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "127.061" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 5, + "source": "g7_pop['Japan']" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "64.511" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 6, + "source": "g7_pop['United Kingdom']" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "The following also works, but it's **NOT** recommended:" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "127.061" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 7, + "source": "g7_pop.Japan" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Slicing and multi-selection\n", + "\n", + "Slicing also works, but **important**, in Pandas, the upper limit is also included:" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 8, + "source": "g7_pop['Germany': 'Japan']" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "Multi indexing also works (similarly to numpy):" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Italy 60.665\n", + "France 63.951\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 9, + "source": "g7_pop[['Italy', 'France', 'United States']]" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Indexing by sequential position\n", + "\n", + "Indexing elements by their sequential position also works. In this case pandas evaluates the object received; if it doesn't exist as an index, it'll try by sequential position.\n", + "\n", + "With sequential position the upper limit is not included." + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.467\n", + "France 63.951\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 10, + "source": "g7_pop" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "35.467" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 11, + "source": "g7_pop.iloc[0] # First element" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "318.523" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 12, + "source": "g7_pop.iloc[-1] # Last element" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "Other examples:" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "80.94" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 13, + "source": "g7_pop.iloc[2]" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "127.061" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 14, + "source": "g7_pop.iloc[4]" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Germany 80.940\n", + "Italy 60.665\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 15, + "source": "g7_pop.iloc[2:4]" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Italy 60.665\n", + "France 63.951\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 16, + "source": "g7_pop.iloc[[3, 1, 6]]" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "### Adding new elements to a `Series`\n", + "\n", + "In many cases we'll want to add new values to our `Series`, to do that we can just simply index our `Series` using the new index and then assigning a value to that index. Let's add two new records:" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": 17, + "source": [ + "g7_pop['Brazil'] = 20.124\n", + "g7_pop['India'] = 32.235" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 35.467\n", + "France 63.951\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 318.523\n", + "Brazil 20.124\n", + "India 32.235\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 18, + "source": "g7_pop" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "### Modifying `Series` elements" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 40.500\n", + "France 63.951\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 318.523\n", + "Brazil 20.124\n", + "India 32.235\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 19, + "source": [ + "g7_pop['Canada'] = 40.5\n", + "\n", + "g7_pop" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 40.500\n", + "France NaN\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 318.523\n", + "Brazil 20.124\n", + "India 32.235\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 20, + "source": [ + "g7_pop['France'] = np.nan\n", + "\n", + "g7_pop" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "### Removing elements from a `Series`" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 40.500\n", + "France NaN\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 318.523\n", + "India 32.235\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 21, + "source": [ + "del g7_pop['Brazil']\n", + "\n", + "g7_pop" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 40.500\n", + "France NaN\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 22, + "source": [ + "del g7_pop['India']\n", + "\n", + "g7_pop" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "### Checking existance of a key (membership)" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 23, + "source": "'France' in g7_pop" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 24, + "source": "'Brazil' in g7_pop" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "## Introducing **`loc`** & **`iloc`**\n", + "\n", + "What's the problem with the indexing we've seen? It's not explicit. Pandas receives an element to index and it tries figuring out if we meant to select an element by its key, or its sequential position. Check out the following example:" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "1 a\n", + "2 b\n", + "3 c\n", + "dtype: object" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 25, + "source": [ + "s = pd.Series(\n", + " ['a', 'b', 'c'],\n", + " index=[1, 2, 3])\n", + "s" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "1 a\n", + "2 b\n", + "3 c\n", + "dtype: object" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 26, + "source": "s" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "What happens if we try indexing `s[1]`, what should it return? `a` or `b`?" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "'a'" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 27, + "source": "s[1]" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "In this case, the returned object is worked out by the index, not by the sequential position. But again, it's not intuitive or explicit.\n", + "\n", + "Enter `loc` and `iloc`:\n", + "* `loc` is the preferred way to select elements in Series (and Dataframes) by their index\n", + "* `iloc` is the preferred way to select by sequential position" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "'a'" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 28, + "source": "s.loc[1]" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "'b'" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 29, + "source": "s.iloc[1]" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 40.500\n", + "France NaN\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 30, + "source": "g7_pop" + }, + { + "metadata": { + "scrolled": true + }, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "318.523" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 31, + "source": "g7_pop.iloc[-1]" + }, + { + "metadata": { + "scrolled": true + }, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 40.5\n", + "France NaN\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 32, + "source": "g7_pop.iloc[[0, 1]]" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "Using our previous series:" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 40.500\n", + "France NaN\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 33, + "source": "g7_pop" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "127.061" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 34, + "source": "g7_pop.loc['Japan']" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "318.523" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 35, + "source": "g7_pop.iloc[-1]" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 40.500\n", + "France NaN\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 36, + "source": "g7_pop" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "40.5" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 37, + "source": "g7_pop.loc['Canada']" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "40.5" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 38, + "source": "g7_pop.iloc[0]" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "318.523" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 39, + "source": "g7_pop.iloc[-1]" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Japan 127.061\n", + "Canada 40.500\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 40, + "source": "g7_pop.loc[['Japan', 'Canada']]" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 40.500\n", + "United States 318.523\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 41, + "source": "g7_pop.iloc[[0, -1]]" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "#### **`loc`** & **`iloc`** to modify `Series`" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 40.500\n", + "France NaN\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 1000.000\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 42, + "source": [ + "g7_pop.loc['United States'] = 1000\n", + "\n", + "g7_pop" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada 40.500\n", + "France NaN\n", + "Germany 80.940\n", + "Italy 60.665\n", + "Japan 127.061\n", + "United Kingdom 64.511\n", + "United States 500.000\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 43, + "source": [ + "g7_pop.iloc[-1] = 500\n", + "\n", + "g7_pop" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)\n", + "\n", + "## Introducing to Boolean arrays\n", + "\n", + "Another way to select certain values within a `Series` is using **boolean arrays**, also known as **Conditional selection**.\n", + "\n", + "We can index our `Series` using a list of boolean values:" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Germany 80.940\n", + "Japan 127.061\n", + "United States 500.000\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 44, + "source": "g7_pop[[False, False, True, False, True, False, True]]" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "Or we can index our `Series` using another `Series` with boolean values:" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Canada False\n", + "France False\n", + "Germany True\n", + "Italy False\n", + "Japan True\n", + "United Kingdom False\n", + "United States True\n", + "dtype: bool" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 45, + "source": [ + "condition = pd.Series([\n", + " False, False, True, False, True, False, True\n", + "], index=[\n", + " 'Canada', 'France', 'Germany', 'Italy', 'Japan', 'United Kingdom', 'United States'\n", + "])\n", + "\n", + "condition" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": [ + "Germany 80.940\n", + "Japan 127.061\n", + "United States 500.000\n", + "Name: G7 Population in millions, dtype: float64" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 46, + "source": "g7_pop[condition]" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "> On upcoming lectures we'll see how to use more complex **conditional selections**." + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pandas/utils.py b/pandas/utils.py new file mode 100644 index 0000000..1e9771e --- /dev/null +++ b/pandas/utils.py @@ -0,0 +1,27 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns + +def apply_theme(): + pd.set_option('display.max_columns', 500) + pd.set_option('display.width', 1000) + pd.set_option('display.max_colwidth', 1000) + pd.set_option('display.float_format', '{:,.3f}'.format) + + flatui = ["#2e86de", "#ff4757", "#feca57", "#2ed573", "#ff7f50", "#00cec9", "#fd79a8", "#a4b0be"] + flatui_palette = sns.color_palette(flatui) + sns.palplot(flatui_palette) + sns.set_palette(flatui_palette) + + sns.set_style("darkgrid", { + 'axes.edgecolor': '#2b2b2b', + 'axes.facecolor': '#2b2b2b', + 'axes.labelcolor': '#919191', + 'figure.facecolor': '#2b2b2b', + 'grid.color': '#545454', + 'patch.edgecolor': '#2b2b2b', + 'text.color': '#bababa', + 'xtick.color': '#bababa', + 'ytick.color': '#bababa' + }) \ No newline at end of file