Skip to content

Commit

Permalink
test: add testcase for import without meta when enabled dynamic
Browse files Browse the repository at this point in the history
Signed-off-by: zhuwenxing <[email protected]>
  • Loading branch information
zhuwenxing committed Apr 29, 2024
1 parent 4da8b66 commit 4c4b06a
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 16 deletions.
16 changes: 8 additions & 8 deletions tests/python_client/common/bulk_insert_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@ def gen_new_json_files(float_vector, rows, dim, data_fields, file_nums=1, array_
return files


def gen_npy_files(float_vector, rows, dim, data_fields, file_size=None, file_nums=1, err_type="", force=False, enable_dynamic_field=False):
def gen_npy_files(float_vector, rows, dim, data_fields, file_size=None, file_nums=1, err_type="", force=False, enable_dynamic_field=False, include_meta=True):
# gen numpy files
files = []
start_uid = 0
Expand All @@ -639,7 +639,7 @@ def gen_npy_files(float_vector, rows, dim, data_fields, file_size=None, file_num
file_name = gen_int_or_float_in_numpy_file(dir=data_source, data_field=data_field,
rows=rows, force=force)
files.append(file_name)
if enable_dynamic_field:
if enable_dynamic_field and include_meta:
file_name = gen_dynamic_field_in_numpy_file(dir=data_source, rows=rows, force=force)
files.append(file_name)
if file_size is not None:
Expand Down Expand Up @@ -688,7 +688,7 @@ def gen_dynamic_field_data_in_parquet_file(rows, start=0):
return data


def gen_parquet_files(float_vector, rows, dim, data_fields, file_size=None, row_group_size=None, file_nums=1, array_length=None, err_type="", enable_dynamic_field=False):
def gen_parquet_files(float_vector, rows, dim, data_fields, file_size=None, row_group_size=None, file_nums=1, array_length=None, err_type="", enable_dynamic_field=False, include_meta=True):
# gen numpy files
if err_type == "":
err_type = "none"
Expand All @@ -703,7 +703,7 @@ def gen_parquet_files(float_vector, rows, dim, data_fields, file_size=None, row_
data = gen_data_by_data_field(data_field=data_field, rows=rows, start=0,
float_vector=float_vector, dim=dim, array_length=array_length)
all_field_data[data_field] = data
if enable_dynamic_field:
if enable_dynamic_field and include_meta:
all_field_data["$meta"] = gen_dynamic_field_data_in_parquet_file(rows=rows, start=0)
df = pd.DataFrame(all_field_data)
log.info(f"df: \n{df}")
Expand Down Expand Up @@ -835,7 +835,7 @@ def prepare_bulk_insert_new_json_files(minio_endpoint="", bucket_name="milvus-bu


def prepare_bulk_insert_numpy_files(minio_endpoint="", bucket_name="milvus-bucket", rows=100, dim=128, enable_dynamic_field=False, file_size=None,
data_fields=[DataField.vec_field], float_vector=True, file_nums=1, force=False):
data_fields=[DataField.vec_field], float_vector=True, file_nums=1, force=False, include_meta=True):
"""
Generate column based files based on params in numpy format and copy them to the minio
Note: each field in data_fields would be generated one numpy file.
Expand Down Expand Up @@ -867,14 +867,14 @@ def prepare_bulk_insert_numpy_files(minio_endpoint="", bucket_name="milvus-bucke
"""
files = gen_npy_files(rows=rows, dim=dim, float_vector=float_vector, file_size=file_size,
data_fields=data_fields, enable_dynamic_field=enable_dynamic_field,
file_nums=file_nums, force=force)
file_nums=file_nums, force=force, include_meta=include_meta)

copy_files_to_minio(host=minio_endpoint, r_source=data_source, files=files, bucket_name=bucket_name, force=force)
return files


def prepare_bulk_insert_parquet_files(minio_endpoint="", bucket_name="milvus-bucket", rows=100, dim=128, array_length=None, file_size=None, row_group_size=None,
enable_dynamic_field=False, data_fields=[DataField.vec_field], float_vector=True, file_nums=1, force=False):
enable_dynamic_field=False, data_fields=[DataField.vec_field], float_vector=True, file_nums=1, force=False, include_meta=True):
"""
Generate column based files based on params in parquet format and copy them to the minio
Note: each field in data_fields would be generated one parquet file.
Expand Down Expand Up @@ -906,7 +906,7 @@ def prepare_bulk_insert_parquet_files(minio_endpoint="", bucket_name="milvus-buc
"""
files = gen_parquet_files(rows=rows, dim=dim, float_vector=float_vector, enable_dynamic_field=enable_dynamic_field,
data_fields=data_fields, array_length=array_length, file_size=file_size, row_group_size=row_group_size,
file_nums=file_nums)
file_nums=file_nums, include_meta=include_meta)
copy_files_to_minio(host=minio_endpoint, r_source=data_source, files=files, bucket_name=bucket_name, force=force)
return files

Expand Down
23 changes: 15 additions & 8 deletions tests/python_client/testcases/test_bulk_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -947,7 +947,8 @@ def test_bulk_insert_all_field_with_new_json_format(self, auto_id, dim, entities
@pytest.mark.parametrize("dim", [128]) # 128
@pytest.mark.parametrize("entities", [1000]) # 1000
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_dynamic_field):
@pytest.mark.parametrize("include_meta", [True, False])
def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_dynamic_field, include_meta):
"""
collection schema 1: [pk, int64, float64, string float_vector]
data file: vectors.npy and uid.npy,
Expand All @@ -957,6 +958,8 @@ def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_d
2. import data
3. verify
"""
if enable_dynamic_field is False and include_meta is True:
pytest.skip("include_meta only works with enable_dynamic_field")
fields = [
cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id),
cf.gen_int64_field(name=df.int_field),
Expand All @@ -977,6 +980,8 @@ def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_d
data_fields=data_fields,
enable_dynamic_field=enable_dynamic_field,
force=True,
include_meta=include_meta,

)
self._connect()
c_name = cf.gen_unique_str("bulk_insert")
Expand Down Expand Up @@ -1031,7 +1036,7 @@ def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_d
fields_from_search = r.fields.keys()
for f in fields:
assert f.name in fields_from_search
if enable_dynamic_field:
if enable_dynamic_field and include_meta:
assert "name" in fields_from_search
assert "address" in fields_from_search

Expand All @@ -1052,18 +1057,17 @@ def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_d
fields_from_search = r.fields.keys()
for f in fields:
assert f.name in fields_from_search
if enable_dynamic_field:
if enable_dynamic_field and include_meta:
assert "name" in fields_from_search
assert "address" in fields_from_search

@pytest.mark.tags(CaseLabel.L3)
@pytest.mark.parametrize("auto_id", [True, False])
@pytest.mark.parametrize("dim", [128]) # 128
@pytest.mark.parametrize("entities", [1000]) # 1000
@pytest.mark.parametrize("file_nums", [1])
@pytest.mark.parametrize("array_len", [None, 0, 100])
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, file_nums, array_len, enable_dynamic_field):
@pytest.mark.parametrize("include_meta", [True, False])
def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, enable_dynamic_field, include_meta):
"""
collection schema 1: [pk, int64, float64, string float_vector]
data file: vectors.parquet and uid.parquet,
Expand All @@ -1072,6 +1076,8 @@ def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, file_n
2. import data
3. verify
"""
if enable_dynamic_field is False and include_meta is True:
pytest.skip("include_meta only works with enable_dynamic_field")
fields = [
cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id),
cf.gen_int64_field(name=df.int_field),
Expand All @@ -1096,6 +1102,7 @@ def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, file_n
data_fields=data_fields,
enable_dynamic_field=enable_dynamic_field,
force=True,
include_meta=include_meta,
)
self._connect()
c_name = cf.gen_unique_str("bulk_insert")
Expand Down Expand Up @@ -1150,7 +1157,7 @@ def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, file_n
fields_from_search = r.fields.keys()
for f in fields:
assert f.name in fields_from_search
if enable_dynamic_field:
if enable_dynamic_field and include_meta:
assert "name" in fields_from_search
assert "address" in fields_from_search

Expand All @@ -1171,7 +1178,7 @@ def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, file_n
fields_from_search = r.fields.keys()
for f in fields:
assert f.name in fields_from_search
if enable_dynamic_field:
if enable_dynamic_field and include_meta:
assert "name" in fields_from_search
assert "address" in fields_from_search

Expand Down

0 comments on commit 4c4b06a

Please sign in to comment.