Skip to content

Commit

Permalink
update microdata_sample_2000 households
Browse files Browse the repository at this point in the history
  • Loading branch information
rafapereirabr committed Jul 16, 2024
1 parent 9ee83ca commit 940c838
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 10 deletions.
2 changes: 1 addition & 1 deletion data_prep/R/add_geography_cols.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ add_geography_cols <- function(arrw, year){
year == 1991 ~ 'code_muni',
year == 2010 ~ 'V0002',
year == 2022 ~ 'CD_MUN',
year == 2000 ~ 'V1103')
year == 2000 ~ 'V0103')

if(year %in% c(2000, 2010)){
arrw <- mutate(arrw,
Expand Down
16 changes: 8 additions & 8 deletions data_prep/R/microdata_sample_2000.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ lapply(X=txt_files,

# list all files
data_dir <- 'R:/Dropbox/bases_de_dados/censo_demografico/censo_2000/microdados_txt'
txt_files <- list.files(data_dir,
txt_files <- list.files(path = data_dir,
pattern = 'Dom[[:digit:]]|DOM',
recursive = TRUE,
full.names = TRUE)
Expand All @@ -49,23 +49,23 @@ lapply(X=txt_files,

# Define the dataset
DS <- arrow::open_dataset(sources = parqt_files)
# Create a scanner
SO <- Scanner$create(DS)
# Load it as n Arrow Table in memory
AT <- SO$ToTable()
rm(DS, SO); gc(T)
# # Create a scanner
# SO <- Scanner$create(DS)
# # Load it as n Arrow Table in memory
# AT <- SO$ToTable()
# rm(DS, SO); gc(T)

## 3.3) add geography variables ----------------------------------------------

# # drop row if all columns are NA
AT <- filter(AT, !is.na(PESO_DOMIC))
AT <- filter(DS, !is.na(P001))

AT <- add_geography_cols(arrw = AT, year = 2000)

head(AT) |> collect()

## 3.4) save single parquet tile ----------------------------------------------
arrow::write_parquet(AT, './data/microdata_sample/2000/2000_households.parquet')
arrow::write_parquet(AT, './data/microdata_sample/2000/2000_households2.parquet')



Expand Down
2 changes: 1 addition & 1 deletion data_prep/read_guides/readguide_2000_households.csv
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ int_pos;var_name;x;label;length;decimal_places;fin_pos;col_type;CHAR
143;V7409;2.;;2;0;144;i;FALSE
145;V7616;6.;;6;0;150;i;FALSE
151;V7617;6.2;;6;2;156;d;FALSE
157;PESO_DOMIC;11.8;;11;8;167;d;FALSE
157;P001;11.8;;11;8;167;d;FALSE
168;v1111;$1.;;1;0;168;c;TRUE
169;v1112;$1.;;1;0;169;c;TRUE
170;v1113;$1.;;1;0;170;c;TRUE

0 comments on commit 940c838

Please sign in to comment.