You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

265 lines
7.4 KiB
Julia

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# То же самое, но на julia
# Версия 1.0 (рабочая)
using Logging
using Printf
using Dates
using CRC32c
using JSON
using Base.Threads
using MySQL
# using DBInterface
# debuglogger = ConsoleLogger(stderr, Logging.Debug)
# global_logger(debuglogger)
# правильный перевод!
function humanbytes(B)
"""Return the given bytes as a human friendly KB, MB, GB, or TB string."""
B = float(B)
KB = float(1024)
MB = float(KB ^ 2) # 1,048,576
GB = float(KB ^ 3) # 1,073,741,824
TB = float(KB ^ 4) # 1,099,511,627,776
if B < KB
return @sprintf("%.2f Bytes", B)
elseif KB <= B && B < MB
return @sprintf("%.2f KB", (B / KB))
elseif MB <= B && B < GB
return @sprintf("%.2f MB", (B / MB))
elseif GB <= B && B < TB
return @sprintf("%.2f GB", (B / GB))
elseif TB <= B
return @sprintf("%.2f TB", (B / KB))
end
end
function humanbitrate(s)
"""
# The function "humanbitrate" takes a string representing a bitrate in the format "a/b" and returns
the bitrate as a float with one decimal place.
:param s: The parameter "s" is a string that represents a fraction in the format "a/b", where "a"
and "b" are numbers
:return: the bitrate as a string.
"""
items = split(s, "/")
a = parse(Float64, items[1])
b = parse(Float64, items[2])
bitrate = @sprintf("%8.1f", (a / b))
return strip(bitrate)
end
function new_name(name)
matches = Dict(
"Сек" => r"(.+) Сек.*",
"Сцен" => r"(.+) Сцен.*",
"Голые" => r"(.+) Голые.*",
"Откро" => r"(.+) Откро.*",
"Посте" => r"(.+) Посте.*",
"Изнас" => r"(.+) Изнас.*",
"Инцес" => r"(.+) Инцес.*",
"Лесб" => r"(.+) Лесб.*",
"Эро" => r"(.+) Эро.*"
)
for (k, v) in matches
if contains(name, k)
base, ext = splitext(name)
new_name = replace(base, v => s"\1")
# @debug base, new_name
return string(new_name, ext)
end
end
return name
end
function valid_file(filename)
valids = [".avi", ".mkv", ".mp4", ".webm", ".mpeg"]
_, ext = splitext(filename)
return ext in valids
end
function humandate(d)
return Dates.format(Dates.unix2datetime(d), "dd.mm.yyyy")
end
# file_info function returns information from file
function file_info(filename)
_, ext = splitext(filename)
info = stat(filename)
crc = CRC32c.crc32c(open(filename, "r"))
m = Dict(
"filename" => filename,
"size" => info.size,
"hsize" => humanbytes(info.size),
"mtime" => info.mtime,
"ctime" => info.ctime,
"hmtime" => humandate(info.mtime),
"hctime" => humandate(info.ctime),
"ext" => ext,
"crc"=> Int(crc),
)
return m
end
# возвращает индекс видео-потока
function get_video_stream(stream)
for i in eachindex(stream)
if stream[i]["codec_type"] == "video"
return i
end
end
return -1
end
# возвращает индекс аудио-потока, -1 при отсутствии
function get_audio_stream(stream)
for i in eachindex(stream)
if stream[i]["codec_type"] == "audio"
return i
end
end
return -1
end
# Потоки могут быть перепутаны, поэтму нужно сначала
# определить индекс видео-потока
function file_meta(filename)
cmd = `ffprobe -v quiet -print_format json -show_format -show_streams "$filename"`
data = read(cmd, String) |> JSON.parse
n = get_video_stream(data["streams"])
audio = get_audio_stream(data["streams"]) != -1
dur = parse(Float64, data["format"]["duration"])
hdur = Dates.unix2datetime(dur)
hduration = Dates.format((hdur), "HH:MM:SS")
res = Dict(
"duration" => hdur,
"hduration" => hduration,
"width" => data["streams"][n]["width"],
"height" => data["streams"][n]["height"],
"bit_rate" => humanbitrate(data["streams"][n]["avg_frame_rate"]),
"codec" => data["streams"][n]["codec_name"],
"audio" => audio,
"tags" => "",
"type" => hduration < "00:10:00" ? "clip" : "film",
"newname" => "",
"poster" => "",
# "aspect_ratio" => data["streams"][n]["display_aspect_ratio"],
)
return res
end
function check_file(filename)
# println(filename)
v2 = file_meta(filename)
v1 = file_info(filename)
result = merge!(v1, v2)
# println(info)
return result
end
function main(host_id::Integer)
dir = "d:\\vids\\ero\\2"
@info "Обработка каталога: $dir"
files = readdir(dir, join=true)
n = length(files)
println("всего: $n files")
conn = DBInterface.connect(MySQL.Connection, "xigmanas", "itman", "X753951x", db="vid")
@time begin
# @sync begin
for file in filter(valid_file, files)
# Threads.@spawn begin
meta = check_file(file)
meta["host_id"] = host_id
# println(meta)
save_db(conn, meta)
# end
end
# end
end
println("обработано")
close(conn)
end
function exist(conn, crc::Integer)
cmd = "select count(*) as cnt from filemeta where crc = $crc"
result = DBInterface.execute(conn, cmd)
data = first(result)
# println("data = $data")
return data.cnt > 0
end
# сохраняет meta в БД
function save_db(conn, m)
filename = m["filename"]
if exist(conn, m["crc"])
@info "уже имеется: $filename"
else
println("сохраняю: $filename")
host_id = m["host_id"]
filename = replace(m["filename"], "\\" => "\\\\")
newname = m["newname"]
size = m["size"]
hsize = m["hsize"]
hduration = m["hduration"]
width = m["width"]
height = m["height"]
bitrate = m["bit_rate"]
ext = m["ext"]
crc = m["crc"]
hctime = m["hctime"]
hmtime = m["hmtime"]
poster = m["poster"]
audio = m["audio"]
codec = m["codec"]
tags = m["tags"]
type = m["type"]
fields = """
host_id, filename, newname, size,
hsize, duration, width,
height, bitrate, ext, crc,
ctime, mtime,
poster, audio, codec, tags,
type
"""
values = """
$host_id, '$filename', '$newname', $size,
'$hsize', '$hduration', $width,
$height, '$bitrate', '$ext', $crc,
'$hctime', '$hmtime',
'$poster', $audio, '$codec', '$tags',
'$type'
"""
cmd = "insert into filemeta ($fields) values ($values)"
println("file: ", filename)
result = DBInterface.execute(conn, cmd)
# println("result: ", result)
end
end
function openDB()
conn = DBInterface.connect(MySQL.Connection, "xigmanas", "itman", "X753951x", db="sea")
limit = 2
ext = ".webp"
cmd = "select ext, newurl from filemeta where ext = '$ext' limit $limit"
result = DBInterface.execute(conn, cmd)
# println(typeof(result))
for s in result
println(s.ext, ' ', s.newurl)
end
end
#=
-------- основная программа --------------
=#
# parallel_execution()
# openDB()
main(1)