I've simple (or not) question.
How I can set num_rows for synthetic_data generated by HMASynthesizer?
Tables:
|
region_id |
address |
0 |
r_0 |
Cohenville |
1 |
r_1 |
Lake Martha |
2 |
r_2 |
West Josephfurt |
3 |
r_3 |
East Valerieshire |
4 |
r_4 |
Madisonport |
|
user_id |
names |
region_id |
0 |
0 |
Tammy |
r_2 |
1 |
1 |
Edward |
r_1 |
2 |
2 |
Veronica |
r_3 |
3 |
3 |
Kelly |
r_1 |
4 |
4 |
Jennifer |
r_0 |
import pandas as pd
from faker import Faker
from sdv.metadata import MultiTableMetadata
from sdv.multi_table import HMASynthesizer
fake = Faker('en_US)
multi_table_data = {
'users': df,
'regions': regions
}
metadata = MultiTableMetadata()
metadata.detect_table_from_dataframe(
table_name='users',
data=df
)
metadata.update_column(
table_name='users',
column_name='user_id',
sdtype='id',
regex_format='[0-9]{1}')
metadata.update_column(
table_name='users',
column_name='region_id',
sdtype='id',
regex_format='[A-Za-z]{3}')
metadata.update_column(
table_name='users',
column_name='names',
sdtype='first_name',
)
metadata.set_primary_key(
table_name='users',
column_name='user_id'
)
metadata.detect_table_from_dataframe(
table_name='regions',
data=regions
)
metadata.update_column(
table_name='regions',
column_name='region_id',
sdtype='id',
regex_format='r_[0-9]{1}',
)
metadata.update_column(
table_name='regions',
column_name='address',
sdtype='address',
)
metadata.set_primary_key(
table_name='regions',
column_name='region_id'
)
metadata.add_relationship(
parent_table_name='regions',
child_table_name='users',
parent_primary_key='region_id',
child_foreign_key='region_id'
)
synthesizer = HMASynthesizer(metadata, locales=['en_US'])
synthesizer.fit(multi_table_data)
synthetic_data = synthesizer.sample()
Synthetic data (I can't control size of dataframes):
|
user_id |
names |
region_id |
0 |
0 |
Jennifer |
r_0 |
1 |
1 |
David |
r_1 |
2 |
2 |
Thomas |
r_1 |
3 |
3 |
Claudia |
r_2 |
4 |
4 |
Bruce |
r_3 |
5 |
5 |
Lisa |
r_3 |
6 |
6 |
Andrew |
r_4 |
|
region_id |
address |
0 |
r_0 |
77809 Rush Mountain Suite 952 Garciaton, DC 32497 |
1 |
r_1 |
USS Morrow FPO AP 28106 |
2 |
r_2 |
123 Dennis Points Humphreymouth, IN 32470 |
3 |
r_3 |
Unit 2560 Box 7577 DPO AA 88965 |
4 |
r_4 |
46038 Karen Via Apt. 979 Arnoldmouth, PA 22955 |