抱歉,您的浏览器无法访问本站
本页面需要浏览器支持(启用)JavaScript
了解详情 >

实用AI工具

图片

图片去背景

使用RMBG为图片去背景

from PIL import Image
import torch
from torchvision import transforms
from transformers import AutoModelForImageSegmentation
import os
from tqdm import tqdm

model = AutoModelForImageSegmentation.from_pretrained('briaai/RMBG-2.0', trust_remote_code=True)
torch.set_float32_matmul_precision(['high', 'highest'][0])
model.to('cuda')
model.eval()

# Data settings
image_size = (1024, 1024)
transform_image = transforms.Compose([
transforms.Resize(image_size),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

data = []
root_dir = r'C:\Users\admin\Downloads\raw'
for dirpath, dirnames, filenames in os.walk(root_dir):
for filename in filenames:
file_path = os.path.join(dirpath, filename)
data.append(file_path)
for file_path in tqdm(data):
image = Image.open(file_path).convert('RGB')
input_images = transform_image(image).unsqueeze(0).to('cuda')

# Prediction
with torch.no_grad():
preds = model(input_images)[-1].sigmoid().cpu()
pred = preds[0].squeeze()
pred_pil = transforms.ToPILImage()(pred)
mask = pred_pil.resize(image.size)
image.putalpha(mask)

save_name, save_extension = os.path.splitext(file_path)
save_path = f'{save_name}.png'
save_path = save_path.replace('raw', 'output')
save_folder = os.path.dirname(save_path)
os.makedirs(save_folder, exist_ok=True)
image.save(save_path)

图片查重

使用CLIP对图片encoder,对提取的向量求余弦距离,越近说明越像

音频

提取节奏点

使用madmom库提取音频节奏点

from madmom.features.downbeats import RNNBarProcessor

proc = CRFBeatDetectionProcessor(fps=100)
act = RNNBeatProcessor()(input_audio_path)
data = proc(act)
print(data.tolist())

评论