|
1003 | 1003 | "metadata": {},
|
1004 | 1004 | "outputs": [],
|
1005 | 1005 | "source": [
|
1006 |
| - "!wc -l data/earthquakes.csv" |
| 1006 | + "!wc -l data/01/earthquakes.csv" |
1007 | 1007 | ]
|
1008 | 1008 | },
|
1009 | 1009 | {
|
|
1014 | 1014 | "**Windows users**: if the above doesn't work for you (depends on your setup), then use this instead:\n",
|
1015 | 1015 | "\n",
|
1016 | 1016 | "```python\n",
|
1017 |
| - "!find /c /v \"\" data\\earthquakes.csv\n", |
| 1017 | + "!find /c /v \"\" data\\01\\earthquakes.csv\n", |
1018 | 1018 | "```\n",
|
1019 | 1019 | "\n",
|
1020 | 1020 | "\n",
|
|
1030 | 1030 | "metadata": {},
|
1031 | 1031 | "outputs": [],
|
1032 | 1032 | "source": [
|
1033 |
| - "!ls -lh data | grep earthquakes.csv" |
| 1033 | + "!ls -lh data/01 | grep earthquakes.csv" |
1034 | 1034 | ]
|
1035 | 1035 | },
|
1036 | 1036 | {
|
|
1041 | 1041 | "**Windows users**: if the above doesn't work for you (depends on your setup), then use this instead:\n",
|
1042 | 1042 | "\n",
|
1043 | 1043 | "```python\n",
|
1044 |
| - "!dir data | findstr \"earthquakes.csv\"\n", |
| 1044 | + "!dir data\\01 | findstr \"earthquakes.csv\"\n", |
1045 | 1045 | "```\n",
|
1046 | 1046 | "\n",
|
1047 | 1047 | "We can even capture the result of a command and use it in our Python code:"
|
|
1054 | 1054 | "metadata": {},
|
1055 | 1055 | "outputs": [],
|
1056 | 1056 | "source": [
|
1057 |
| - "files = !ls -lh data\n", |
| 1057 | + "files = !ls -lh data/01\n", |
1058 | 1058 | "[file for file in files if 'earthquake' in file]"
|
1059 | 1059 | ]
|
1060 | 1060 | },
|
|
1066 | 1066 | "**Windows users**: if the above doesn't work for you (depends on your setup), then use this instead:\n",
|
1067 | 1067 | "\n",
|
1068 | 1068 | "```python\n",
|
1069 |
| - "files = !dir data\n", |
| 1069 | + "files = !dir data\\01\n", |
1070 | 1070 | "[file for file in files if 'earthquake' in file]\n",
|
1071 | 1071 | "```"
|
1072 | 1072 | ]
|
|
1088 | 1088 | "metadata": {},
|
1089 | 1089 | "outputs": [],
|
1090 | 1090 | "source": [
|
1091 |
| - "!head -n 2 data/earthquakes.csv" |
| 1091 | + "!head -n 2 data/01/earthquakes.csv" |
1092 | 1092 | ]
|
1093 | 1093 | },
|
1094 | 1094 | {
|
|
1100 | 1100 | "\n",
|
1101 | 1101 | "```python\n",
|
1102 | 1102 | "n = 2\n",
|
1103 |
| - "with open('data/earthquakes.csv', 'r') as file:\n", |
| 1103 | + "with open('data/01/earthquakes.csv', 'r') as file:\n", |
1104 | 1104 | " for _ in range(n):\n",
|
1105 | 1105 | " print(file.readline(), end='\\r')\n",
|
1106 | 1106 | "```\n",
|
|
1116 | 1116 | "metadata": {},
|
1117 | 1117 | "outputs": [],
|
1118 | 1118 | "source": [
|
1119 |
| - "!tail -n 1 data/earthquakes.csv" |
| 1119 | + "!tail -n 1 data/01/earthquakes.csv" |
1120 | 1120 | ]
|
1121 | 1121 | },
|
1122 | 1122 | {
|
|
1129 | 1129 | "```python\n",
|
1130 | 1130 | "import os\n",
|
1131 | 1131 | "\n",
|
1132 |
| - "with open('data/earthquakes.csv', 'rb') as file:\n", |
| 1132 | + "with open('data/01/earthquakes.csv', 'rb') as file:\n", |
1133 | 1133 | " file.seek(0, os.SEEK_END)\n",
|
1134 | 1134 | " while file.read(1) != b'\\n':\n",
|
1135 | 1135 | " file.seek(-2, os.SEEK_CUR)\n",
|
|
1140 | 1140 | "\n",
|
1141 | 1141 | "```python\n",
|
1142 | 1142 | "n = 2\n",
|
1143 |
| - "with open('data/earthquakes.csv', 'r') as file:\n", |
| 1143 | + "with open('data/01/earthquakes.csv', 'r') as file:\n", |
1144 | 1144 | " print('\\r'.join(file.readlines()[-n:]))\n",
|
1145 | 1145 | "```\n",
|
1146 | 1146 | "\n"
|
|
1164 | 1164 | "metadata": {},
|
1165 | 1165 | "outputs": [],
|
1166 | 1166 | "source": [
|
1167 |
| - "!awk -F',' '{print NF; exit}' data/earthquakes.csv" |
| 1167 | + "!awk -F',' '{print NF; exit}' data/01/earthquakes.csv" |
1168 | 1168 | ]
|
1169 | 1169 | },
|
1170 | 1170 | {
|
|
1175 | 1175 | "**Windows users**: if the above or below don't work for you (depends on your setup), then use this instead:\n",
|
1176 | 1176 | "\n",
|
1177 | 1177 | "```python\n",
|
1178 |
| - "with open('data/earthquakes.csv', 'r') as file:\n", |
| 1178 | + "with open('data/01/earthquakes.csv', 'r') as file:\n", |
1179 | 1179 | " print(len(file.readline().split(',')))\n",
|
1180 | 1180 | "```\n",
|
1181 | 1181 | "\n",
|
|
1190 | 1190 | "metadata": {},
|
1191 | 1191 | "outputs": [],
|
1192 | 1192 | "source": [
|
1193 |
| - "headers = !head -n 1 data/earthquakes.csv\n", |
| 1193 | + "headers = !head -n 1 data/01/earthquakes.csv\n", |
1194 | 1194 | "len(headers[0].split(','))"
|
1195 | 1195 | ]
|
1196 | 1196 | },
|
|
1220 | 1220 | "metadata": {},
|
1221 | 1221 | "outputs": [],
|
1222 | 1222 | "source": [
|
1223 |
| - "df = pd.read_csv('data/earthquakes.csv')" |
| 1223 | + "df = pd.read_csv('data/01/earthquakes.csv')" |
1224 | 1224 | ]
|
1225 | 1225 | },
|
1226 | 1226 | {
|
|
2155 | 2155 | "pd.concat([tsunami, no_tsunami]).shape"
|
2156 | 2156 | ]
|
2157 | 2157 | },
|
2158 |
| - { |
2159 |
| - "cell_type": "markdown", |
2160 |
| - "id": "d38495fa-fe5e-4937-9774-b90c0d26e6d9", |
2161 |
| - "metadata": {}, |
2162 |
| - "source": [ |
2163 |
| - "Note that the previous result is equivalent to running the `append()` method of the dataframe:" |
2164 |
| - ] |
2165 |
| - }, |
2166 |
| - { |
2167 |
| - "cell_type": "code", |
2168 |
| - "execution_count": null, |
2169 |
| - "id": "8c6be158-f310-42b7-a05e-cd4b6a6e07b2", |
2170 |
| - "metadata": {}, |
2171 |
| - "outputs": [], |
2172 |
| - "source": [ |
2173 |
| - "tsunami.append(no_tsunami).shape" |
2174 |
| - ] |
2175 |
| - }, |
2176 | 2158 | {
|
2177 | 2159 | "cell_type": "markdown",
|
2178 | 2160 | "id": "ba559768-6848-4eca-9b84-04b8b6e78417",
|
|
2189 | 2171 | "outputs": [],
|
2190 | 2172 | "source": [
|
2191 | 2173 | "additional_columns = pd.read_csv(\n",
|
2192 |
| - " 'data/earthquakes.csv', usecols=['tz', 'felt', 'ids']\n", |
| 2174 | + " 'data/01/earthquakes.csv', usecols=['tz', 'felt', 'ids']\n", |
2193 | 2175 | ")\n",
|
2194 | 2176 | "pd.concat([df.head(2), additional_columns.head(2)], axis=1)"
|
2195 | 2177 | ]
|
|
2210 | 2192 | "outputs": [],
|
2211 | 2193 | "source": [
|
2212 | 2194 | "additional_columns = pd.read_csv(\n",
|
2213 |
| - " 'data/earthquakes.csv', usecols=['tz', 'felt', 'ids', 'time'], index_col='time'\n", |
| 2195 | + " 'data/01/earthquakes.csv', usecols=['tz', 'felt', 'ids', 'time'], index_col='time'\n", |
2214 | 2196 | ")\n",
|
2215 | 2197 | "pd.concat([df.head(2), additional_columns.head(2)], axis=1)"
|
2216 | 2198 | ]
|
|
3618 | 3600 | "metadata": {},
|
3619 | 3601 | "outputs": [],
|
3620 | 3602 | "source": [
|
3621 |
| - "extra_data = long_df.append([{\n", |
| 3603 | + "extra_data = pd.DataFrame([{\n", |
3622 | 3604 | " 'datatype': 'TAVG', \n",
|
3623 | 3605 | " 'date': '2018-10-01', \n",
|
3624 | 3606 | " 'temp_C': 10, \n",
|
3625 | 3607 | " 'temp_F': 50\n",
|
3626 | 3608 | "}]).set_index(['date', 'datatype']).sort_index()\n",
|
3627 | 3609 | "\n",
|
3628 |
| - "extra_data['2018-10-01':'2018-10-02']" |
| 3610 | + "extra_data = pd.concat([long_df, extra_data])\n", |
| 3611 | + "\n", |
| 3612 | + "extra_data.head()" |
3629 | 3613 | ]
|
3630 | 3614 | },
|
3631 | 3615 | {
|
|
0 commit comments